Break up Gemtext link parsing code and improve tests.
This commit is contained in:
@@ -2,103 +2,12 @@ package gemini
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gemini-grc/common"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strconv"
|
||||
|
||||
"gemini-grc/logging"
|
||||
"gemini-grc/common"
|
||||
)
|
||||
|
||||
func GetPageLinks(currentURL common.URL, gemtext string) common.LinkList {
|
||||
// Grab link lines
|
||||
linkLines := ExtractLinkLines(gemtext)
|
||||
if len(linkLines) == 0 {
|
||||
return nil
|
||||
}
|
||||
var linkURLs common.LinkList
|
||||
// Normalize URLs in links, and store them in snapshot
|
||||
for _, line := range linkLines {
|
||||
linkURL, err := NormalizeLink(line, currentURL.String())
|
||||
if err != nil {
|
||||
logging.LogDebug("%s: %s", common.ErrGeminiLinkLineParse, err)
|
||||
continue
|
||||
}
|
||||
linkURLs = append(linkURLs, *linkURL)
|
||||
}
|
||||
return linkURLs
|
||||
}
|
||||
|
||||
// ExtractLinkLines takes a Gemtext document as a string and returns all lines that are link lines
|
||||
func ExtractLinkLines(gemtext string) []string {
|
||||
// Define the regular expression pattern to match link lines
|
||||
re := regexp.MustCompile(`(?m)^=>[ \t]+.*`)
|
||||
|
||||
// Find all matches using the regular expression
|
||||
matches := re.FindAllString(gemtext, -1)
|
||||
|
||||
return matches
|
||||
}
|
||||
|
||||
// NormalizeLink takes a single link line and the current URL,
|
||||
// return the URL converted to an absolute URL
|
||||
// and its description.
|
||||
func NormalizeLink(linkLine string, currentURL string) (*common.URL, error) {
|
||||
// Parse the current URL
|
||||
baseURL, err := url.Parse(currentURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
// Regular expression to extract the URL part from a link line
|
||||
re := regexp.MustCompile(`^=>[ \t]+(\S+)([ \t]+.*)?`)
|
||||
|
||||
// Use regex to extract the URL and the rest of the line
|
||||
matches := re.FindStringSubmatch(linkLine)
|
||||
if len(matches) == 0 {
|
||||
// If the line doesn't match the expected format, return it unchanged
|
||||
return nil, fmt.Errorf("%w for link line %s", common.ErrGeminiLinkLineParse, linkLine)
|
||||
}
|
||||
|
||||
originalURLStr := matches[1]
|
||||
_, err = url.QueryUnescape(originalURLStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLDecode, err)
|
||||
}
|
||||
|
||||
restOfLine := ""
|
||||
if len(matches) > 2 {
|
||||
restOfLine = matches[2]
|
||||
}
|
||||
|
||||
// Parse the URL from the link line
|
||||
parsedURL, err := url.Parse(originalURLStr)
|
||||
if err != nil {
|
||||
// If URL parsing fails, return an error
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
// Resolve relative URLs against the base URL
|
||||
if !parsedURL.IsAbs() {
|
||||
parsedURL = baseURL.ResolveReference(parsedURL)
|
||||
}
|
||||
|
||||
// Remove usual first space from URL description:
|
||||
// => URL description
|
||||
// ^^^^^^^^^^^^
|
||||
if len(restOfLine) > 0 && restOfLine[0] == ' ' {
|
||||
restOfLine = restOfLine[1:]
|
||||
}
|
||||
|
||||
finalURL, err := common.ParseURL(parsedURL.String(), restOfLine)
|
||||
if err != nil {
|
||||
// If URL parsing fails, return an error
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
return finalURL, nil
|
||||
}
|
||||
|
||||
// ParseFirstTwoDigits takes a string and returns the first one or two digits as an int.
|
||||
// If no valid digits are found, it returns an error.
|
||||
func ParseFirstTwoDigits(input string) (int, error) {
|
||||
|
||||
88
gemini/geminiLinks.go
Normal file
88
gemini/geminiLinks.go
Normal file
@@ -0,0 +1,88 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"regexp"
|
||||
|
||||
"gemini-grc/common"
|
||||
"gemini-grc/logging"
|
||||
"gemini-grc/util"
|
||||
)
|
||||
|
||||
func GetPageLinks(currentURL common.URL, gemtext string) common.LinkList {
|
||||
linkLines := util.GetLinesMatchingRegex(gemtext, `(?m)^=>[ \t]+.*`)
|
||||
if len(linkLines) == 0 {
|
||||
return nil
|
||||
}
|
||||
var linkURLs common.LinkList
|
||||
// Normalize URLs in links
|
||||
for _, line := range linkLines {
|
||||
linkUrl, err := ParseGeminiLinkLine(line, currentURL.String())
|
||||
if err != nil {
|
||||
logging.LogDebug("%s: %s", common.ErrGeminiLinkLineParse, err)
|
||||
continue
|
||||
}
|
||||
linkURLs = append(linkURLs, *linkUrl)
|
||||
}
|
||||
return linkURLs
|
||||
}
|
||||
|
||||
// ParseGeminiLinkLine takes a single link line and the current URL,
|
||||
// return the URL converted to an absolute URL
|
||||
// and its description.
|
||||
func ParseGeminiLinkLine(linkLine string, currentURL string) (*common.URL, error) {
|
||||
// Check: currentURL is parseable
|
||||
baseURL, err := url.Parse(currentURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
// Extract the actual URL and the description
|
||||
re := regexp.MustCompile(`^=>[ \t]+(\S+)([ \t]+.*)?`)
|
||||
matches := re.FindStringSubmatch(linkLine)
|
||||
if len(matches) == 0 {
|
||||
// If the line doesn't match the expected format, return it unchanged
|
||||
return nil, fmt.Errorf("%w could not parse gemini link %s", common.ErrGeminiLinkLineParse, linkLine)
|
||||
}
|
||||
|
||||
originalURLStr := matches[1]
|
||||
|
||||
// Check: Unescape the URL if escaped
|
||||
_, err = url.QueryUnescape(originalURLStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLDecode, err)
|
||||
}
|
||||
|
||||
description := ""
|
||||
if len(matches) > 2 {
|
||||
description = matches[2]
|
||||
}
|
||||
|
||||
// Parse the URL from the link line
|
||||
parsedURL, err := url.Parse(originalURLStr)
|
||||
if err != nil {
|
||||
// If URL parsing fails, return an error
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
// If link URL is relative, resolve full URL
|
||||
if !parsedURL.IsAbs() {
|
||||
parsedURL = baseURL.ResolveReference(parsedURL)
|
||||
}
|
||||
|
||||
// Remove usual first space from URL description:
|
||||
// => URL description
|
||||
// ^^^^^^^^^^^^
|
||||
if len(description) > 0 && description[0] == ' ' {
|
||||
description = description[1:]
|
||||
}
|
||||
|
||||
finalURL, err := common.ParseURL(parsedURL.String(), description)
|
||||
if err != nil {
|
||||
// If URL parsing fails, return an error
|
||||
return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
|
||||
}
|
||||
|
||||
return finalURL, nil
|
||||
}
|
||||
125
gemini/geminiLinks_test.go
Normal file
125
gemini/geminiLinks_test.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"gemini-grc/common"
|
||||
)
|
||||
|
||||
type TestData struct {
|
||||
currentURL string
|
||||
link string
|
||||
value *common.URL
|
||||
error error
|
||||
}
|
||||
|
||||
var data = []TestData{
|
||||
{
|
||||
currentURL: "https://gemini.com/",
|
||||
link: "https://gemini.com/",
|
||||
value: nil,
|
||||
error: common.ErrGeminiLinkLineParse,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> archive/ Complete Archive",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd/archive/",
|
||||
Descr: "Complete Archive",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd/archive/",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> /cgi-bin/xkcd.cgi?a=5&b=6 Example",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd.cgi",
|
||||
Descr: "Example",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd.cgi?a=5&b=6",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> /cgi-bin/xkcd.cgi?1494 XKCD 1494: Insurance",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd.cgi",
|
||||
Descr: "XKCD 1494: Insurance",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd.cgi?1494",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> /cgi-bin/xkcd.cgi?1494#f XKCD 1494: Insurance",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd.cgi",
|
||||
Descr: "XKCD 1494: Insurance",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd.cgi?1494#f",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://gemi.dev/cgi-bin/xkcd/",
|
||||
link: "=> /cgi-bin/xkcd.cgi?c=5#d XKCD 1494: Insurance",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "gemi.dev",
|
||||
Port: 1965,
|
||||
Path: "/cgi-bin/xkcd.cgi",
|
||||
Descr: "XKCD 1494: Insurance",
|
||||
Full: "gemini://gemi.dev:1965/cgi-bin/xkcd.cgi?c=5#d",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
{
|
||||
currentURL: "gemini://a.b/c#d",
|
||||
link: "=> /d/e#f",
|
||||
value: &common.URL{
|
||||
Protocol: "gemini",
|
||||
Hostname: "a.b",
|
||||
Port: 1965,
|
||||
Path: "/d/e",
|
||||
Descr: "",
|
||||
Full: "gemini://a.b:1965/d/e#f",
|
||||
},
|
||||
error: nil,
|
||||
},
|
||||
}
|
||||
|
||||
func Test(t *testing.T) {
|
||||
t.Parallel()
|
||||
for i, expected := range data {
|
||||
result, err := ParseGeminiLinkLine(expected.link, expected.currentURL)
|
||||
if err != nil { //nolint:nestif
|
||||
if expected.value != nil {
|
||||
t.Errorf("data[%d]: Expected value %v, got %v", i, nil, expected.value)
|
||||
}
|
||||
if !errors.Is(err, common.ErrGeminiLinkLineParse) {
|
||||
t.Errorf("data[%d]: expected error %v, got %v", i, expected.error, err)
|
||||
}
|
||||
} else {
|
||||
if expected.error != nil {
|
||||
t.Errorf("data[%d]: Expected error %v, got %v", i, nil, expected.error)
|
||||
}
|
||||
if !(reflect.DeepEqual(result, expected.value)) {
|
||||
t.Errorf("data[%d]: expected %#v, got %#v", i, expected.value, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,9 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"gemini-grc/common"
|
||||
"testing"
|
||||
|
||||
"gemini-grc/common"
|
||||
)
|
||||
|
||||
func TestExtractRedirectTargetFullURL(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user