Break up common functions and small refactor.

This commit is contained in:
2025-01-04 15:31:26 +02:00
parent b78fe00221
commit 4e6fad873b
16 changed files with 259 additions and 177 deletions

100
common/errors.go Normal file
View File

@@ -0,0 +1,100 @@
package common
import (
"errors"
"fmt"
)
type GeminiError struct {
Msg string
Code int
Header string
}
func (e *GeminiError) Error() string {
return fmt.Sprintf("%s: %s", e.Msg, e.Header)
}
func NewErrGeminiStatusCode(code int, header string) error {
var msg string
switch {
case code >= 10 && code < 20:
msg = "needs input"
case code >= 30 && code < 40:
msg = "redirect"
case code >= 40 && code < 50:
msg = "bad request"
case code >= 50 && code < 60:
msg = "server error"
case code >= 60 && code < 70:
msg = "TLS error"
default:
msg = "unexpected status code"
}
return &GeminiError{
Msg: msg,
Code: code,
Header: header,
}
}
var (
ErrGeminiRobotsParse = errors.New("gemini robots.txt parse error")
ErrGeminiRobotsDisallowed = errors.New("gemini robots.txt disallowed")
ErrGeminiResponseHeader = errors.New("gemini response header error")
ErrGeminiRedirect = errors.New("gemini redirection error")
ErrGeminiLinkLineParse = errors.New("gemini link line parse error")
ErrURLParse = errors.New("URL parse error")
ErrURLNotGemini = errors.New("not a Gemini URL")
ErrURLDecode = errors.New("URL decode error")
ErrUTF8Parse = errors.New("UTF-8 parse error")
ErrTextParse = errors.New("text parse error")
ErrNetwork = errors.New("network error")
ErrNetworkDNS = errors.New("network DNS error")
ErrNetworkTLS = errors.New("network TLS error")
ErrNetworkSetConnectionDeadline = errors.New("network error - cannot set connection deadline")
ErrNetworkCannotWrite = errors.New("network error - cannot write")
ErrNetworkResponseSizeExceededMax = errors.New("network error - response size exceeded maximum size")
ErrDatabase = errors.New("database error")
)
// We could have used a map for speed, but
// we would lose ability to check wrapped
// errors via errors.Is().
var errGemini *GeminiError
var knownErrors = []error{ //nolint:gochecknoglobals
errGemini,
ErrGeminiLinkLineParse,
ErrGeminiRobotsParse,
ErrGeminiRobotsDisallowed,
ErrGeminiResponseHeader,
ErrGeminiRedirect,
ErrURLParse,
ErrURLDecode,
ErrUTF8Parse,
ErrTextParse,
ErrNetwork,
ErrNetworkDNS,
ErrNetworkTLS,
ErrNetworkSetConnectionDeadline,
ErrNetworkCannotWrite,
ErrNetworkResponseSizeExceededMax,
ErrDatabase,
}
func IsKnownError(err error) bool {
for _, known := range knownErrors {
if errors.Is(err, known) {
return true
}
}
return errors.As(err, new(*GeminiError))
}

24
common/errors_test.go Normal file
View File

@@ -0,0 +1,24 @@
package common
import (
"errors"
"fmt"
"testing"
)
func TestErrGemini(t *testing.T) {
t.Parallel()
err := NewErrGeminiStatusCode(50, "50 server error")
if !errors.As(err, new(*GeminiError)) {
t.Errorf("TestErrGemini fail")
}
}
func TestErrGeminiWrapped(t *testing.T) {
t.Parallel()
err := NewErrGeminiStatusCode(50, "50 server error")
errWrapped := fmt.Errorf("%w wrapped", err)
if !errors.As(errWrapped, new(*GeminiError)) {
t.Errorf("TestErrGeminiWrapped fail")
}
}

229
common/gemini_url.go Normal file
View File

@@ -0,0 +1,229 @@
package common
import (
"database/sql/driver"
"fmt"
"net/url"
"path"
"strconv"
"strings"
)
type URL struct {
Protocol string `json:"protocol,omitempty"`
Hostname string `json:"hostname,omitempty"`
Port int `json:"port,omitempty"`
Path string `json:"path,omitempty"`
Descr string `json:"descr,omitempty"`
Full string `json:"full,omitempty"`
}
func (u *URL) Scan(value interface{}) error {
if value == nil {
// Clear the fields in the current GeminiUrl object (not the pointer itself)
*u = URL{}
return nil
}
b, ok := value.(string)
if !ok {
return fmt.Errorf("failed to scan GeminiUrl: expected string, got %T", value)
}
parsedURL, err := ParseURLNoNormalize(b, "")
if err != nil {
err = fmt.Errorf("failed to scan GeminiUrl %s: %v", b, err)
return err
}
*u = *parsedURL
return nil
}
func (u URL) String() string {
return u.Full
}
func (u URL) StringNoDefaultPort() string {
if u.Port == 1965 {
return fmt.Sprintf("%s://%s%s", u.Protocol, u.Hostname, u.Path)
}
return u.Full
}
func (u URL) Value() (driver.Value, error) {
if u.Full == "" {
return nil, nil
}
return u.Full, nil
}
func ParseURLNoNormalize(input string, descr string) (*URL, error) {
u, err := url.Parse(input)
if err != nil {
return nil, fmt.Errorf("%w: Input %s URL Parse Error: %w", ErrURLParse, input, err)
}
if u.Scheme != "gemini" {
return nil, fmt.Errorf("%w: URL scheme '%s' is not supported", ErrURLNotGemini, u.Scheme)
}
protocol := u.Scheme
hostname := u.Hostname()
strPort := u.Port()
urlPath := u.Path
if strPort == "" {
strPort = "1965"
}
port, err := strconv.Atoi(strPort)
if err != nil {
return nil, fmt.Errorf("%w: Input %s GeminiError %w", ErrURLParse, input, err)
}
full := fmt.Sprintf("%s://%s:%d%s", protocol, hostname, port, urlPath)
return &URL{Protocol: protocol, Hostname: hostname, Port: port, Path: urlPath, Descr: descr, Full: full}, nil
}
func ParseURL(input string, descr string) (*URL, error) {
u, err := NormalizeURL(input)
if err != nil {
return nil, fmt.Errorf("%w: Input %s URL Parse Error: %w", ErrURLParse, input, err)
}
if u.Scheme != "gemini" {
return nil, fmt.Errorf("%w: URL scheme '%s' is not supported", ErrURLNotGemini, u.Scheme)
}
protocol := u.Scheme
hostname := u.Hostname()
strPort := u.Port()
urlPath := u.Path
if strPort == "" {
strPort = "1965"
}
port, err := strconv.Atoi(strPort)
if err != nil {
return nil, fmt.Errorf("%w: Input %s GeminiError %w", ErrURLParse, input, err)
}
full := fmt.Sprintf("%s://%s:%d%s", protocol, hostname, port, urlPath)
return &URL{Protocol: protocol, Hostname: hostname, Port: port, Path: urlPath, Descr: descr, Full: full}, nil
}
// DeriveAbsoluteURL converts a (possibly) relative
// URL to an absolute one. Used primarily to calculate
// the full redirection URL target from a response header.
func DeriveAbsoluteURL(currentURL URL, input string) (*URL, error) {
// If target URL is absolute, return just it
if strings.Contains(input, "://") {
return ParseURL(input, "")
}
// input is a relative path. Clean it and construct absolute.
var newPath string
// Handle weird cases found in the wild
if strings.HasPrefix(input, "/") {
newPath = path.Clean(input)
} else if input == "./" || input == "." {
newPath = path.Join(currentURL.Path, "/")
} else {
newPath = path.Join(currentURL.Path, "/", path.Clean(input))
}
strURL := fmt.Sprintf("%s://%s:%d%s", currentURL.Protocol, currentURL.Hostname, currentURL.Port, newPath)
return ParseURL(strURL, "")
}
// NormalizeURL takes a URL string and returns a normalized version.
// Normalized meaning:
// - Path normalization (removing redundant slashes, . and .. segments)
// - Proper escaping of special characters
// - Lowercase scheme and host
// - Removal of default ports
// - Empty path becomes "/"
func NormalizeURL(rawURL string) (*url.URL, error) {
// Parse the URL
u, err := url.Parse(rawURL)
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrURLParse, err)
}
// Convert scheme to lowercase
u.Scheme = strings.ToLower(u.Scheme)
// Convert hostname to lowercase
if u.Host != "" {
u.Host = strings.ToLower(u.Host)
}
// Remove default ports
if u.Port() != "" {
switch {
case u.Scheme == "http" && u.Port() == "80":
u.Host = u.Hostname()
case u.Scheme == "https" && u.Port() == "443":
u.Host = u.Hostname()
case u.Scheme == "gemini" && u.Port() == "1965":
u.Host = u.Hostname()
}
}
// Handle path normalization while preserving trailing slash
if u.Path != "" {
// Check if there was a trailing slash before cleaning
hadTrailingSlash := strings.HasSuffix(u.Path, "/")
u.Path = path.Clean(u.Path)
// If path was "/", path.Clean() will return "."
if u.Path == "." {
u.Path = "/"
} else if hadTrailingSlash && u.Path != "/" {
// Restore trailing slash if it existed and path isn't just "/"
u.Path += "/"
}
}
// Properly escape the path
// First split on '/' to avoid escaping them
parts := strings.Split(u.Path, "/")
for i, part := range parts {
parts[i] = url.PathEscape(part)
}
u.Path = strings.Join(parts, "/")
// Remove trailing fragment if empty
if u.Fragment == "" {
u.Fragment = ""
}
// Remove trailing query if empty
if u.RawQuery == "" {
u.RawQuery = ""
}
return u, nil
}
func EscapeURL(input string) string {
// Only escape if not already escaped
if strings.Contains(input, "%") && !strings.Contains(input, "% ") {
return input
}
// Split URL into parts (protocol, host, path)
parts := strings.SplitN(input, "://", 2)
if len(parts) != 2 {
return input
}
protocol := parts[0]
remainder := parts[1]
// If URL ends with just a slash, return as is
if strings.HasSuffix(remainder, "/") && !strings.Contains(remainder[:len(remainder)-1], "/") {
return input
}
// Split host and path
parts = strings.SplitN(remainder, "/", 2)
host := parts[0]
if len(parts) == 1 {
return protocol + "://" + host
}
path := parts[1]
// Escape the path portion
escapedPath := url.PathEscape(path)
// Reconstruct the URL
return protocol + "://" + host + "/" + escapedPath
}

223
common/gemini_url_test.go Normal file
View File

@@ -0,0 +1,223 @@
package common
import (
"reflect"
"testing"
)
func TestParseURL(t *testing.T) {
t.Parallel()
input := "gemini://caolan.uk/cgi-bin/weather.py/wxfcs/3162"
parsed, err := ParseURL(input, "")
value, _ := parsed.Value()
if err != nil || !(value == "gemini://caolan.uk:1965/cgi-bin/weather.py/wxfcs/3162") {
t.Errorf("fail: %s", parsed)
}
}
func TestDeriveAbsoluteURL_abs_url_input(t *testing.T) {
t.Parallel()
currentURL := URL{
Protocol: "gemini",
Hostname: "smol.gr",
Port: 1965,
Path: "/a/b",
Descr: "Nothing",
Full: "gemini://smol.gr:1965/a/b",
}
input := "gemini://a.b/c"
output, err := DeriveAbsoluteURL(currentURL, input)
if err != nil {
t.Errorf("fail: %v", err)
}
expected := &URL{
Protocol: "gemini",
Hostname: "a.b",
Port: 1965,
Path: "/c",
Descr: "",
Full: "gemini://a.b:1965/c",
}
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestDeriveAbsoluteURL_abs_path_input(t *testing.T) {
t.Parallel()
currentURL := URL{
Protocol: "gemini",
Hostname: "smol.gr",
Port: 1965,
Path: "/a/b",
Descr: "Nothing",
Full: "gemini://smol.gr:1965/a/b",
}
input := "/c"
output, err := DeriveAbsoluteURL(currentURL, input)
if err != nil {
t.Errorf("fail: %v", err)
}
expected := &URL{
Protocol: "gemini",
Hostname: "smol.gr",
Port: 1965,
Path: "/c",
Descr: "",
Full: "gemini://smol.gr:1965/c",
}
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestDeriveAbsoluteURL_rel_path_input(t *testing.T) {
t.Parallel()
currentURL := URL{
Protocol: "gemini",
Hostname: "smol.gr",
Port: 1965,
Path: "/a/b",
Descr: "Nothing",
Full: "gemini://smol.gr:1965/a/b",
}
input := "c/d"
output, err := DeriveAbsoluteURL(currentURL, input)
if err != nil {
t.Errorf("fail: %v", err)
}
expected := &URL{
Protocol: "gemini",
Hostname: "smol.gr",
Port: 1965,
Path: "/a/b/c/d",
Descr: "",
Full: "gemini://smol.gr:1965/a/b/c/d",
}
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizeURLSlash(t *testing.T) {
t.Parallel()
input := "gemini://uscoffings.net/retro-computing/magazines/"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := input
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizeURLNoSlash(t *testing.T) {
t.Parallel()
input := "gemini://uscoffings.net/retro-computing/magazines"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := input
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizeMultiSlash(t *testing.T) {
t.Parallel()
input := "gemini://uscoffings.net/retro-computing/////////a///magazines"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := "gemini://uscoffings.net/retro-computing/a/magazines"
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizeTrailingSlash(t *testing.T) {
t.Parallel()
input := "gemini://uscoffings.net/"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := "gemini://uscoffings.net/"
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizeNoTrailingSlash(t *testing.T) {
t.Parallel()
input := "gemini://uscoffings.net"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := "gemini://uscoffings.net"
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizeTrailingSlashPath(t *testing.T) {
t.Parallel()
input := "gemini://uscoffings.net/a/"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := "gemini://uscoffings.net/a/"
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizeNoTrailingSlashPath(t *testing.T) {
t.Parallel()
input := "gemini://uscoffings.net/a"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := "gemini://uscoffings.net/a"
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizeDot(t *testing.T) {
t.Parallel()
input := "gemini://uscoffings.net/retro-computing/./././////a///magazines"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := "gemini://uscoffings.net/retro-computing/a/magazines"
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizePort(t *testing.T) {
t.Parallel()
input := "gemini://uscoffings.net:1965/a"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := "gemini://uscoffings.net/a"
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}
func TestNormalizeURL(t *testing.T) {
t.Parallel()
input := "gemini://chat.gemini.lehmann.cx:11965/"
normalized, _ := NormalizeURL(input)
output := normalized.String()
expected := "gemini://chat.gemini.lehmann.cx:11965/"
pass := reflect.DeepEqual(output, expected)
if !pass {
t.Errorf("fail: %#v != %#v", output, expected)
}
}

56
common/snapshot.go Normal file
View File

@@ -0,0 +1,56 @@
package common
import (
"database/sql/driver"
"encoding/json"
"fmt"
"time"
"github.com/guregu/null/v5"
)
type LinkList []URL
func (l *LinkList) Value() (driver.Value, error) {
return json.Marshal(l)
}
func (l *LinkList) Scan(value interface{}) error {
if value == nil {
*l = nil
return nil
}
b, ok := value.([]byte) // Type assertion! Converts to []byte
if !ok {
return fmt.Errorf("failed to scan LinkList: expected []byte, got %T", value)
}
return json.Unmarshal(b, l)
}
type Snapshot struct {
ID int `db:"id" json:"id,omitempty"`
URL URL `db:"url" json:"url,omitempty"`
Host string `db:"host" json:"host,omitempty"`
Timestamp null.Time `db:"timestamp" json:"timestamp,omitempty"`
MimeType null.String `db:"mimetype" json:"mimetype,omitempty"`
Data null.Value[[]byte] `db:"data" json:"data,omitempty"` // For non text/gemini files.
GemText null.String `db:"gemtext" json:"gemtext,omitempty"` // For text/gemini files.
Header null.String `db:"header" json:"header,omitempty"` // Response header.
Links null.Value[LinkList] `db:"links" json:"links,omitempty"`
Lang null.String `db:"lang" json:"lang,omitempty"`
ResponseCode null.Int `db:"response_code" json:"code,omitempty"` // Gemini response status code.
Error null.String `db:"error" json:"error,omitempty"` // On network errors only
}
func SnapshotFromURL(u string) *Snapshot {
url, err := ParseURL(u, "")
if err != nil {
return nil
}
newSnapshot := Snapshot{
URL: *url,
Host: url.Hostname,
Timestamp: null.TimeFrom(time.Now()),
}
return &newSnapshot
}