Compare commits
5 Commits
f26acc04fb
...
63adf73ef9
| Author | SHA1 | Date | |
|---|---|---|---|
| 63adf73ef9 | |||
| b3387ce7ad | |||
| 9ade26b6e8 | |||
| 4a345a1763 | |||
| 64f98bb37c |
@@ -29,7 +29,7 @@ func NewErrGeminiStatusCode(code int, header string) error {
|
||||
case code >= 60 && code < 70:
|
||||
msg = "TLS error"
|
||||
default:
|
||||
msg = "unexpected status code"
|
||||
msg = "unexpected Status code"
|
||||
}
|
||||
return &GeminiError{
|
||||
Msg: msg,
|
||||
@@ -51,6 +51,8 @@ var (
|
||||
ErrUTF8Parse = errors.New("UTF-8 parse error")
|
||||
ErrTextParse = errors.New("text parse error")
|
||||
|
||||
ErrBlacklistMatches = errors.New("url matches blacklist")
|
||||
|
||||
ErrNetwork = errors.New("network error")
|
||||
ErrNetworkDNS = errors.New("network DNS error")
|
||||
ErrNetworkTLS = errors.New("network TLS error")
|
||||
@@ -59,6 +61,7 @@ var (
|
||||
ErrNetworkResponseSizeExceededMax = errors.New("network error - response size exceeded maximum size")
|
||||
|
||||
ErrDatabase = errors.New("database error")
|
||||
ErrDatabaseScan = errors.New("database scan error")
|
||||
)
|
||||
|
||||
// We could have used a map for speed, but
|
||||
@@ -75,6 +78,8 @@ var knownErrors = []error{ //nolint:gochecknoglobals
|
||||
ErrGeminiResponseHeader,
|
||||
ErrGeminiRedirect,
|
||||
|
||||
ErrBlacklistMatches,
|
||||
|
||||
ErrURLParse,
|
||||
ErrURLDecode,
|
||||
ErrUTF8Parse,
|
||||
@@ -88,6 +93,7 @@ var knownErrors = []error{ //nolint:gochecknoglobals
|
||||
ErrNetworkResponseSizeExceededMax,
|
||||
|
||||
ErrDatabase,
|
||||
ErrDatabaseScan,
|
||||
}
|
||||
|
||||
func IsKnownError(err error) bool {
|
||||
|
||||
@@ -1,24 +1,25 @@
|
||||
package common
|
||||
package common_test
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"gemini-grc/common"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestErrGemini(t *testing.T) {
|
||||
t.Parallel()
|
||||
err := NewErrGeminiStatusCode(50, "50 server error")
|
||||
if !errors.As(err, new(*GeminiError)) {
|
||||
err := common.NewErrGeminiStatusCode(50, "50 server error")
|
||||
if !errors.As(err, new(*common.GeminiError)) {
|
||||
t.Errorf("TestErrGemini fail")
|
||||
}
|
||||
}
|
||||
|
||||
func TestErrGeminiWrapped(t *testing.T) {
|
||||
t.Parallel()
|
||||
err := NewErrGeminiStatusCode(50, "50 server error")
|
||||
err := common.NewErrGeminiStatusCode(50, "50 server error")
|
||||
errWrapped := fmt.Errorf("%w wrapped", err)
|
||||
if !errors.As(errWrapped, new(*GeminiError)) {
|
||||
if !errors.As(errWrapped, new(*common.GeminiError)) {
|
||||
t.Errorf("TestErrGeminiWrapped fail")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,11 +26,11 @@ func (u *URL) Scan(value interface{}) error {
|
||||
}
|
||||
b, ok := value.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("failed to scan GeminiUrl: expected string, got %T", value)
|
||||
return fmt.Errorf("%w: expected string, got %T", ErrDatabaseScan, value)
|
||||
}
|
||||
parsedURL, err := ParseURLNoNormalize(b, "")
|
||||
if err != nil {
|
||||
err = fmt.Errorf("failed to scan GeminiUrl %s: %v", b, err)
|
||||
err = fmt.Errorf("%w: failed to scan GeminiUrl %s: %v", ErrDatabaseScan, b, err)
|
||||
return err
|
||||
}
|
||||
*u = *parsedURL
|
||||
@@ -75,6 +75,13 @@ func ParseURLNoNormalize(input string, descr string) (*URL, error) {
|
||||
return nil, fmt.Errorf("%w: Input %s GeminiError %w", ErrURLParse, input, err)
|
||||
}
|
||||
full := fmt.Sprintf("%s://%s:%d%s", protocol, hostname, port, urlPath)
|
||||
// full field should also contain query params and url fragments
|
||||
if u.RawQuery != "" {
|
||||
full += "?" + u.RawQuery
|
||||
}
|
||||
if u.Fragment != "" {
|
||||
full += "#" + u.Fragment
|
||||
}
|
||||
return &URL{Protocol: protocol, Hostname: hostname, Port: port, Path: urlPath, Descr: descr, Full: full}, nil
|
||||
}
|
||||
|
||||
@@ -98,6 +105,13 @@ func ParseURL(input string, descr string) (*URL, error) {
|
||||
return nil, fmt.Errorf("%w: Input %s GeminiError %w", ErrURLParse, input, err)
|
||||
}
|
||||
full := fmt.Sprintf("%s://%s:%d%s", protocol, hostname, port, urlPath)
|
||||
// full field should also contain query params and url fragments
|
||||
if u.RawQuery != "" {
|
||||
full += "?" + u.RawQuery
|
||||
}
|
||||
if u.Fragment != "" {
|
||||
full += "#" + u.Fragment
|
||||
}
|
||||
return &URL{Protocol: protocol, Hostname: hostname, Port: port, Path: urlPath, Descr: descr, Full: full}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package common
|
||||
package common_test
|
||||
|
||||
import (
|
||||
"gemini-grc/common"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
@@ -8,7 +9,7 @@ import (
|
||||
func TestParseURL(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://caolan.uk/cgi-bin/weather.py/wxfcs/3162"
|
||||
parsed, err := ParseURL(input, "")
|
||||
parsed, err := common.ParseURL(input, "")
|
||||
value, _ := parsed.Value()
|
||||
if err != nil || !(value == "gemini://caolan.uk:1965/cgi-bin/weather.py/wxfcs/3162") {
|
||||
t.Errorf("fail: %s", parsed)
|
||||
@@ -17,7 +18,7 @@ func TestParseURL(t *testing.T) {
|
||||
|
||||
func TestDeriveAbsoluteURL_abs_url_input(t *testing.T) {
|
||||
t.Parallel()
|
||||
currentURL := URL{
|
||||
currentURL := common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "smol.gr",
|
||||
Port: 1965,
|
||||
@@ -26,11 +27,11 @@ func TestDeriveAbsoluteURL_abs_url_input(t *testing.T) {
|
||||
Full: "gemini://smol.gr:1965/a/b",
|
||||
}
|
||||
input := "gemini://a.b/c"
|
||||
output, err := DeriveAbsoluteURL(currentURL, input)
|
||||
output, err := common.DeriveAbsoluteURL(currentURL, input)
|
||||
if err != nil {
|
||||
t.Errorf("fail: %v", err)
|
||||
}
|
||||
expected := &URL{
|
||||
expected := &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "a.b",
|
||||
Port: 1965,
|
||||
@@ -46,7 +47,7 @@ func TestDeriveAbsoluteURL_abs_url_input(t *testing.T) {
|
||||
|
||||
func TestDeriveAbsoluteURL_abs_path_input(t *testing.T) {
|
||||
t.Parallel()
|
||||
currentURL := URL{
|
||||
currentURL := common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "smol.gr",
|
||||
Port: 1965,
|
||||
@@ -55,11 +56,11 @@ func TestDeriveAbsoluteURL_abs_path_input(t *testing.T) {
|
||||
Full: "gemini://smol.gr:1965/a/b",
|
||||
}
|
||||
input := "/c"
|
||||
output, err := DeriveAbsoluteURL(currentURL, input)
|
||||
output, err := common.DeriveAbsoluteURL(currentURL, input)
|
||||
if err != nil {
|
||||
t.Errorf("fail: %v", err)
|
||||
}
|
||||
expected := &URL{
|
||||
expected := &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "smol.gr",
|
||||
Port: 1965,
|
||||
@@ -75,7 +76,7 @@ func TestDeriveAbsoluteURL_abs_path_input(t *testing.T) {
|
||||
|
||||
func TestDeriveAbsoluteURL_rel_path_input(t *testing.T) {
|
||||
t.Parallel()
|
||||
currentURL := URL{
|
||||
currentURL := common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "smol.gr",
|
||||
Port: 1965,
|
||||
@@ -84,11 +85,11 @@ func TestDeriveAbsoluteURL_rel_path_input(t *testing.T) {
|
||||
Full: "gemini://smol.gr:1965/a/b",
|
||||
}
|
||||
input := "c/d"
|
||||
output, err := DeriveAbsoluteURL(currentURL, input)
|
||||
output, err := common.DeriveAbsoluteURL(currentURL, input)
|
||||
if err != nil {
|
||||
t.Errorf("fail: %v", err)
|
||||
}
|
||||
expected := &URL{
|
||||
expected := &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "smol.gr",
|
||||
Port: 1965,
|
||||
@@ -105,7 +106,7 @@ func TestDeriveAbsoluteURL_rel_path_input(t *testing.T) {
|
||||
func TestNormalizeURLSlash(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://uscoffings.net/retro-computing/magazines/"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := input
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
@@ -117,7 +118,7 @@ func TestNormalizeURLSlash(t *testing.T) {
|
||||
func TestNormalizeURLNoSlash(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://uscoffings.net/retro-computing/magazines"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := input
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
@@ -129,7 +130,7 @@ func TestNormalizeURLNoSlash(t *testing.T) {
|
||||
func TestNormalizeMultiSlash(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://uscoffings.net/retro-computing/////////a///magazines"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := "gemini://uscoffings.net/retro-computing/a/magazines"
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
@@ -141,7 +142,7 @@ func TestNormalizeMultiSlash(t *testing.T) {
|
||||
func TestNormalizeTrailingSlash(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://uscoffings.net/"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := "gemini://uscoffings.net/"
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
@@ -153,7 +154,7 @@ func TestNormalizeTrailingSlash(t *testing.T) {
|
||||
func TestNormalizeNoTrailingSlash(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://uscoffings.net"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := "gemini://uscoffings.net"
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
@@ -165,7 +166,7 @@ func TestNormalizeNoTrailingSlash(t *testing.T) {
|
||||
func TestNormalizeTrailingSlashPath(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://uscoffings.net/a/"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := "gemini://uscoffings.net/a/"
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
@@ -177,7 +178,7 @@ func TestNormalizeTrailingSlashPath(t *testing.T) {
|
||||
func TestNormalizeNoTrailingSlashPath(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://uscoffings.net/a"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := "gemini://uscoffings.net/a"
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
@@ -189,7 +190,7 @@ func TestNormalizeNoTrailingSlashPath(t *testing.T) {
|
||||
func TestNormalizeDot(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://uscoffings.net/retro-computing/./././////a///magazines"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := "gemini://uscoffings.net/retro-computing/a/magazines"
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
@@ -201,7 +202,7 @@ func TestNormalizeDot(t *testing.T) {
|
||||
func TestNormalizePort(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://uscoffings.net:1965/a"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := "gemini://uscoffings.net/a"
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
@@ -213,11 +214,38 @@ func TestNormalizePort(t *testing.T) {
|
||||
func TestNormalizeURL(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := "gemini://chat.gemini.lehmann.cx:11965/"
|
||||
normalized, _ := NormalizeURL(input)
|
||||
normalized, _ := common.NormalizeURL(input)
|
||||
output := normalized.String()
|
||||
expected := "gemini://chat.gemini.lehmann.cx:11965/"
|
||||
pass := reflect.DeepEqual(output, expected)
|
||||
if !pass {
|
||||
t.Errorf("fail: %#v != %#v", output, expected)
|
||||
}
|
||||
|
||||
input = "gemini://chat.gemini.lehmann.cx:11965/index?a=1&b=c"
|
||||
normalized, _ = common.NormalizeURL(input)
|
||||
output = normalized.String()
|
||||
expected = "gemini://chat.gemini.lehmann.cx:11965/index?a=1&b=c"
|
||||
pass = reflect.DeepEqual(output, expected)
|
||||
if !pass {
|
||||
t.Errorf("fail: %#v != %#v", output, expected)
|
||||
}
|
||||
|
||||
input = "gemini://chat.gemini.lehmann.cx:11965/index#1"
|
||||
normalized, _ = common.NormalizeURL(input)
|
||||
output = normalized.String()
|
||||
expected = "gemini://chat.gemini.lehmann.cx:11965/index#1"
|
||||
pass = reflect.DeepEqual(output, expected)
|
||||
if !pass {
|
||||
t.Errorf("fail: %#v != %#v", output, expected)
|
||||
}
|
||||
|
||||
input = "gemini://gemi.dev/cgi-bin/xkcd.cgi?1494"
|
||||
normalized, _ = common.NormalizeURL(input)
|
||||
output = normalized.String()
|
||||
expected = "gemini://gemi.dev/cgi-bin/xkcd.cgi?1494"
|
||||
pass = reflect.DeepEqual(output, expected)
|
||||
if !pass {
|
||||
t.Errorf("fail: %#v != %#v", output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ const (
|
||||
EnvPanicOnUnexpectedError = "PANIC_ON_UNEXPECTED_ERROR"
|
||||
EnvBlacklistPath = "BLACKLIST_PATH"
|
||||
EnvDryRun = "DRY_RUN"
|
||||
EnvPrintWorkerStatus = "PRINT_WORKER_STATUS"
|
||||
)
|
||||
|
||||
// Config holds the application configuration loaded from environment variables.
|
||||
@@ -30,6 +31,7 @@ type Config struct {
|
||||
PanicOnUnexpectedError bool // Panic on unexpected errors when visiting a URL
|
||||
BlacklistPath string // File that has blacklisted strings of "host:port"
|
||||
DryRun bool // If false, don't write to disk
|
||||
PrintWorkerStatus bool // If false, don't print worker status table
|
||||
}
|
||||
|
||||
var CONFIG Config //nolint:gochecknoglobals
|
||||
@@ -136,6 +138,14 @@ func GetConfig() *Config {
|
||||
config.DryRun = val
|
||||
return nil
|
||||
},
|
||||
EnvPrintWorkerStatus: func(v string) error {
|
||||
val, err := parseBool(EnvPrintWorkerStatus, v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
config.PrintWorkerStatus = val
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
// Process each environment variable
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"gemini-grc/logging"
|
||||
)
|
||||
|
||||
var IPPool = IpAddressPool{IPs: make(map[string]int)}
|
||||
|
||||
func AddIPsToPool(ips []string) {
|
||||
IPPool.Lock.Lock()
|
||||
for _, ip := range ips {
|
||||
logging.LogDebug("Adding %s to pool", ip)
|
||||
IPPool.IPs[ip] = 1
|
||||
}
|
||||
IPPool.Lock.Unlock()
|
||||
}
|
||||
|
||||
func RemoveIPsFromPool(IPs []string) {
|
||||
IPPool.Lock.Lock()
|
||||
for _, ip := range IPs {
|
||||
_, ok := IPPool.IPs[ip]
|
||||
if ok {
|
||||
logging.LogDebug("Removing %s from pool", ip)
|
||||
delete(IPPool.IPs, ip)
|
||||
}
|
||||
}
|
||||
IPPool.Lock.Unlock()
|
||||
}
|
||||
@@ -2,103 +2,12 @@ package gemini
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gemini-grc/common"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strconv"
|
||||
|
||||
"gemini-grc/logging"
|
||||
"gemini-grc/common"
|
||||
)
|
||||
|
||||
func GetPageLinks(currentURL common.URL, gemtext string) common.LinkList {
|
||||
// Grab link lines
|
||||
linkLines := ExtractLinkLines(gemtext)
|
||||
if len(linkLines) == 0 {
|
||||
return nil
|
||||
}
|
||||
var linkURLs common.LinkList
|
||||
// Normalize URLs in links, and store them in snapshot
|
||||
for _, line := range linkLines {
|
||||
linkURL, err := NormalizeLink(line, currentURL.String())
|
||||
if err != nil {
|
||||
logging.LogDebug("%s: %s", common.ErrGeminiLinkLineParse, err)
|
||||
continue
|
||||
}
|
||||
linkURLs = append(linkURLs, *linkURL)
|
||||
}
|
||||
return linkURLs
|
||||
}
|
||||
|
||||
// ExtractLinkLines takes a Gemtext document as a string and returns all lines that are link lines
|
||||
func ExtractLinkLines(gemtext string) []string {
|
||||
// Define the regular expression pattern to match link lines
|
||||
re := regexp.MustCompile(`(?m)^=>[ \t]+.*`)
|
||||
|
||||
// Find all matches using the regular expression
|
||||
matches := re.FindAllString(gemtext, -1)
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
// NormalizeLink takes a single link line and the current URL,
|
||||
// return the URL converted to an absolute URL
|
||||
// and its description.
|
||||
func NormalizeLink(linkLine string, currentURL string) (*common.URL, error) {
|
||||
// Parse the current URL
|
||||
baseURL, err := url.Parse(currentURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
// Regular expression to extract the URL part from a link line
|
||||
re := regexp.MustCompile(`^=>[ \t]+(\S+)([ \t]+.*)?`)
|
||||
|
||||
// Use regex to extract the URL and the rest of the line
|
||||
matches := re.FindStringSubmatch(linkLine)
|
||||
if len(matches) == 0 {
|
||||
// If the line doesn't match the expected format, return it unchanged
|
||||
return nil, fmt.Errorf("%w for link line %s", common.ErrGeminiLinkLineParse, linkLine)
|
||||
}
|
||||
|
||||
originalURLStr := matches[1]
|
||||
_, err = url.QueryUnescape(originalURLStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLDecode, err)
|
||||
}
|
||||
|
||||
restOfLine := ""
|
||||
if len(matches) > 2 {
|
||||
restOfLine = matches[2]
|
||||
}
|
||||
|
||||
// Parse the URL from the link line
|
||||
parsedURL, err := url.Parse(originalURLStr)
|
||||
if err != nil {
|
||||
// If URL parsing fails, return an error
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
// Resolve relative URLs against the base URL
|
||||
if !parsedURL.IsAbs() {
|
||||
parsedURL = baseURL.ResolveReference(parsedURL)
|
||||
}
|
||||
|
||||
// Remove usual first space from URL description:
|
||||
// => URL description
|
||||
// ^^^^^^^^^^^^
|
||||
if len(restOfLine) > 0 && restOfLine[0] == ' ' {
|
||||
restOfLine = restOfLine[1:]
|
||||
}
|
||||
|
||||
finalURL, err := common.ParseURL(parsedURL.String(), restOfLine)
|
||||
if err != nil {
|
||||
// If URL parsing fails, return an error
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
return finalURL, nil
|
||||
}
|
||||
|
||||
// ParseFirstTwoDigits takes a string and returns the first one or two digits as an int.
|
||||
// If no valid digits are found, it returns an error.
|
||||
func ParseFirstTwoDigits(input string) (int, error) {
|
||||
|
||||
88
gemini/geminiLinks.go
Normal file
88
gemini/geminiLinks.go
Normal file
@@ -0,0 +1,88 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"regexp"
|
||||
|
||||
"gemini-grc/common"
|
||||
"gemini-grc/logging"
|
||||
"gemini-grc/util"
|
||||
)
|
||||
|
||||
func GetPageLinks(currentURL common.URL, gemtext string) common.LinkList {
|
||||
linkLines := util.GetLinesMatchingRegex(gemtext, `(?m)^=>[ \t]+.*`)
|
||||
if len(linkLines) == 0 {
|
||||
return nil
|
||||
}
|
||||
var linkURLs common.LinkList
|
||||
// Normalize URLs in links
|
||||
for _, line := range linkLines {
|
||||
linkUrl, err := ParseGeminiLinkLine(line, currentURL.String())
|
||||
if err != nil {
|
||||
logging.LogDebug("%s: %s", common.ErrGeminiLinkLineParse, err)
|
||||
continue
|
||||
}
|
||||
linkURLs = append(linkURLs, *linkUrl)
|
||||
}
|
||||
return linkURLs
|
||||
}
|
||||
|
||||
// ParseGeminiLinkLine takes a single link line and the current URL,
|
||||
// return the URL converted to an absolute URL
|
||||
// and its description.
|
||||
func ParseGeminiLinkLine(linkLine string, currentURL string) (*common.URL, error) {
|
||||
// Check: currentURL is parseable
|
||||
baseURL, err := url.Parse(currentURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
// Extract the actual URL and the description
|
||||
re := regexp.MustCompile(`^=>[ \t]+(\S+)([ \t]+.*)?`)
|
||||
matches := re.FindStringSubmatch(linkLine)
|
||||
if len(matches) == 0 {
|
||||
// If the line doesn't match the expected format, return it unchanged
|
||||
return nil, fmt.Errorf("%w could not parse gemini link %s", common.ErrGeminiLinkLineParse, linkLine)
|
||||
}
|
||||
|
||||
originalURLStr := matches[1]
|
||||
|
||||
// Check: Unescape the URL if escaped
|
||||
_, err = url.QueryUnescape(originalURLStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLDecode, err)
|
||||
}
|
||||
|
||||
description := ""
|
||||
if len(matches) > 2 {
|
||||
description = matches[2]
|
||||
}
|
||||
|
||||
// Parse the URL from the link line
|
||||
parsedURL, err := url.Parse(originalURLStr)
|
||||
if err != nil {
|
||||
// If URL parsing fails, return an error
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
// If link URL is relative, resolve full URL
|
||||
if !parsedURL.IsAbs() {
|
||||
parsedURL = baseURL.ResolveReference(parsedURL)
|
||||
}
|
||||
|
||||
// Remove usual first space from URL description:
|
||||
// => URL description
|
||||
// ^^^^^^^^^^^^
|
||||
if len(description) > 0 && description[0] == ' ' {
|
||||
description = description[1:]
|
||||
}
|
||||
|
||||
finalURL, err := common.ParseURL(parsedURL.String(), description)
|
||||
if err != nil {
|
||||
// If URL parsing fails, return an error
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
return finalURL, nil
|
||||
}
|
||||
125
gemini/geminiLinks_test.go
Normal file
125
gemini/geminiLinks_test.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"gemini-grc/common"
|
||||
)
|
||||
|
||||
type TestData struct {
|
||||
currentURL string
|
||||
link string
|
||||
value *common.URL
|
||||
error error
|
||||
}
|
||||
|
||||
var data = []TestData{
|
||||
{
|
||||
currentURL: "https://gemini.com/",
|
||||
link: "https://gemini.com/",
|
||||
value: nil,
|
||||
error: common.ErrGeminiLinkLineParse,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> archive/ Complete Archive",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd/archive/",
|
||||
Descr: "Complete Archive",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd/archive/",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> /cgi-bin/xkcd.cgi?a=5&b=6 Example",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd.cgi",
|
||||
Descr: "Example",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd.cgi?a=5&b=6",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> /cgi-bin/xkcd.cgi?1494 XKCD 1494: Insurance",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd.cgi",
|
||||
Descr: "XKCD 1494: Insurance",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd.cgi?1494",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> /cgi-bin/xkcd.cgi?1494#f XKCD 1494: Insurance",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd.cgi",
|
||||
Descr: "XKCD 1494: Insurance",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd.cgi?1494#f",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> /cgi-bin/xkcd.cgi?c=5#d XKCD 1494: Insurance",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd.cgi",
|
||||
Descr: "XKCD 1494: Insurance",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd.cgi?c=5#d",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://a.b/c#d",
|
||||
link: "=> /d/e#f",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "a.b",
|
||||
Port: 1965,
|
||||
Path: "/d/e",
|
||||
Descr: "",
|
||||
Full: "gemini://a.b:1965/d/e#f",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
}
|
||||
|
||||
func Test(t *testing.T) {
|
||||
t.Parallel()
|
||||
for i, expected := range data {
|
||||
result, err := ParseGeminiLinkLine(expected.link, expected.currentURL)
|
||||
if err != nil { //nolint:nestif
|
||||
if expected.value != nil {
|
||||
t.Errorf("data[%d]: Expected value %v, got %v", i, nil, expected.value)
|
||||
}
|
||||
if !errors.Is(err, common.ErrGeminiLinkLineParse) {
|
||||
t.Errorf("data[%d]: expected error %v, got %v", i, expected.error, err)
|
||||
}
|
||||
} else {
|
||||
if expected.error != nil {
|
||||
t.Errorf("data[%d]: Expected error %v, got %v", i, nil, expected.error)
|
||||
}
|
||||
if !(reflect.DeepEqual(result, expected.value)) {
|
||||
t.Errorf("data[%d]: expected %#v, got %#v", i, expected.value, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,9 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"gemini-grc/common"
|
||||
"testing"
|
||||
|
||||
"gemini-grc/common"
|
||||
)
|
||||
|
||||
func TestExtractRedirectTargetFullURL(t *testing.T) {
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"gemini-grc/common"
|
||||
"io"
|
||||
"net"
|
||||
gourl "net/url"
|
||||
@@ -14,6 +13,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gemini-grc/common"
|
||||
"gemini-grc/config"
|
||||
"gemini-grc/logging"
|
||||
"github.com/guregu/null/v5"
|
||||
@@ -28,20 +28,11 @@ type PageData struct {
|
||||
Data []byte
|
||||
}
|
||||
|
||||
// Resolve the URL hostname and
|
||||
// check if we already have an open
|
||||
// connection to this host.
|
||||
// If we can connect, return a list
|
||||
// of the resolved IPs.
|
||||
func getHostIPAddresses(hostname string) ([]string, error) {
|
||||
addrs, err := net.LookupHost(hostname)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w:%w", common.ErrNetworkDNS, err)
|
||||
}
|
||||
IPPool.Lock.RLock()
|
||||
defer func() {
|
||||
IPPool.Lock.RUnlock()
|
||||
}()
|
||||
return addrs, nil
|
||||
}
|
||||
|
||||
|
||||
6
go.mod
6
go.mod
@@ -9,16 +9,22 @@ require (
|
||||
github.com/lib/pq v1.10.9
|
||||
github.com/matoous/go-nanoid/v2 v2.1.0
|
||||
github.com/rs/zerolog v1.33.0
|
||||
github.com/stretchr/testify v1.9.0
|
||||
golang.org/x/text v0.19.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.2 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/rogpeppe/go-internal v1.13.1 // indirect
|
||||
golang.org/x/crypto v0.27.0 // indirect
|
||||
golang.org/x/sync v0.8.0 // indirect
|
||||
golang.org/x/sys v0.25.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
9
go.sum
9
go.sum
@@ -1,6 +1,7 @@
|
||||
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
|
||||
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
@@ -19,6 +20,10 @@ github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo
|
||||
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
|
||||
github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
|
||||
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
||||
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||
github.com/matoous/go-nanoid/v2 v2.1.0 h1:P64+dmq21hhWdtvZfEAofnvJULaRR1Yib0+PnU669bE=
|
||||
@@ -34,6 +39,8 @@ github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxU
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
|
||||
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
|
||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
||||
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||
@@ -54,6 +61,8 @@ golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
|
||||
golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
||||
54
hostPool/hostPool.go
Normal file
54
hostPool/hostPool.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package hostPool
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gemini-grc/logging"
|
||||
)
|
||||
|
||||
var hostPool = HostPool{hostnames: make(map[string]struct{})} //nolint:gochecknoglobals
|
||||
|
||||
type HostPool struct {
|
||||
hostnames map[string]struct{}
|
||||
Lock sync.RWMutex
|
||||
}
|
||||
|
||||
func (p *HostPool) Add(key string) {
|
||||
p.Lock.Lock()
|
||||
defer p.Lock.Unlock()
|
||||
p.hostnames[key] = struct{}{}
|
||||
}
|
||||
|
||||
func (p *HostPool) Get(key string) bool {
|
||||
p.Lock.RLock()
|
||||
defer p.Lock.RUnlock()
|
||||
_, ok := p.hostnames[key]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (p *HostPool) Delete(key string) {
|
||||
p.Lock.Lock()
|
||||
defer p.Lock.Unlock()
|
||||
delete(p.hostnames, key)
|
||||
}
|
||||
|
||||
func AddHostToHostPool(key string) {
|
||||
for {
|
||||
// Sleep until the host doesn't exist in pool,
|
||||
// then add it.
|
||||
if hostPool.Get(key) {
|
||||
time.Sleep(1 * time.Second) // Avoid flood-retrying
|
||||
logging.LogInfo("Waiting to add %s to pool...", key)
|
||||
} else {
|
||||
hostPool.Add(key)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func RemoveHostFromHostPool(key string) {
|
||||
if hostPool.Get(key) {
|
||||
hostPool.Delete(key)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user