Break up Gemtext link parsing code and improve tests.

2025-01-16 09:38:28 +02:00
parent 982fb75bd2
commit ea92b90c62
6 changed files with 231 additions and 93 deletions
--- a/gemini/gemini.go
+++ b/gemini/gemini.go
@@ -2,103 +2,12 @@ package gemini

 import (
 	"fmt"
-	"gemini-grc/common"
-	"net/url"
 	"regexp"
 	"strconv"

-	"gemini-grc/logging"
+	"gemini-grc/common"
 )

-func GetPageLinks(currentURL common.URL, gemtext string) common.LinkList {
-	// Grab link lines
-	linkLines := ExtractLinkLines(gemtext)
-	if len(linkLines) == 0 {
-		return nil
-	}
-	var linkURLs common.LinkList
-	// Normalize URLs in links, and store them in snapshot
-	for _, line := range linkLines {
-		linkURL, err := NormalizeLink(line, currentURL.String())
-		if err != nil {
-			logging.LogDebug("%s: %s", common.ErrGeminiLinkLineParse, err)
-			continue
-		}
-		linkURLs = append(linkURLs, *linkURL)
-	}
-	return linkURLs
-}
-
-// ExtractLinkLines takes a Gemtext document as a string and returns all lines that are link lines
-func ExtractLinkLines(gemtext string) []string {
-	// Define the regular expression pattern to match link lines
-	re := regexp.MustCompile(`(?m)^=>[ \t]+.*`)
-
-	// Find all matches using the regular expression
-	matches := re.FindAllString(gemtext, -1)
-
-	return matches
-}
-
-// NormalizeLink takes a single link line and the current URL,
-// return the URL converted to an absolute URL
-// and its description.
-func NormalizeLink(linkLine string, currentURL string) (*common.URL, error) {
-	// Parse the current URL
-	baseURL, err := url.Parse(currentURL)
-	if err != nil {
-		return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
-	}
-
-	// Regular expression to extract the URL part from a link line
-	re := regexp.MustCompile(`^=>[ \t]+(\S+)([ \t]+.*)?`)
-
-	// Use regex to extract the URL and the rest of the line
-	matches := re.FindStringSubmatch(linkLine)
-	if len(matches) == 0 {
-		// If the line doesn't match the expected format, return it unchanged
-		return nil, fmt.Errorf("%w for link line %s", common.ErrGeminiLinkLineParse, linkLine)
-	}
-
-	originalURLStr := matches[1]
-	_, err = url.QueryUnescape(originalURLStr)
-	if err != nil {
-		return nil, fmt.Errorf("%w: %w", common.ErrURLDecode, err)
-	}
-
-	restOfLine := ""
-	if len(matches) > 2 {
-		restOfLine = matches[2]
-	}
-
-	// Parse the URL from the link line
-	parsedURL, err := url.Parse(originalURLStr)
-	if err != nil {
-		// If URL parsing fails, return an error
-		return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
-	}
-
-	// Resolve relative URLs against the base URL
-	if !parsedURL.IsAbs() {
-		parsedURL = baseURL.ResolveReference(parsedURL)
-	}
-
-	// Remove usual first space from URL description:
-	// => URL description
-	//       ^^^^^^^^^^^^
-	if len(restOfLine) > 0 && restOfLine[0] == ' ' {
-		restOfLine = restOfLine[1:]
-	}
-
-	finalURL, err := common.ParseURL(parsedURL.String(), restOfLine)
-	if err != nil {
-		// If URL parsing fails, return an error
-		return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err)
-	}
-
-	return finalURL, nil
-}
-
 // ParseFirstTwoDigits takes a string and returns the first one or two digits as an int.
 // If no valid digits are found, it returns an error.
 func ParseFirstTwoDigits(input string) (int, error) {