Add seedList module for URL initialization, comprehensive SQL utilities for database analysis, and update project configuration.
88 lines
2.5 KiB
Go
88 lines
2.5 KiB
Go
package gemini
|
|
|
|
import (
|
|
"fmt"
|
|
"net/url"
|
|
"regexp"
|
|
|
|
"gemini-grc/common/linkList"
|
|
url2 "gemini-grc/common/url"
|
|
"gemini-grc/util"
|
|
"git.antanst.com/antanst/logging"
|
|
"git.antanst.com/antanst/xerrors"
|
|
)
|
|
|
|
func GetPageLinks(currentURL url2.URL, gemtext string) linkList.LinkList {
|
|
linkLines := util.GetLinesMatchingRegex(gemtext, `(?m)^=>[ \t]+.*`)
|
|
if len(linkLines) == 0 {
|
|
return nil
|
|
}
|
|
var linkURLs linkList.LinkList
|
|
// Normalize URLs in links
|
|
for _, line := range linkLines {
|
|
linkUrl, err := ParseGeminiLinkLine(line, currentURL.String())
|
|
if err != nil {
|
|
logging.LogDebug("error parsing gemini link line: %s", err)
|
|
continue
|
|
}
|
|
linkURLs = append(linkURLs, *linkUrl)
|
|
}
|
|
return linkURLs
|
|
}
|
|
|
|
// ParseGeminiLinkLine takes a single link line and the current URL,
|
|
// return the URL converted to an absolute URL
|
|
// and its description.
|
|
func ParseGeminiLinkLine(linkLine string, currentURL string) (*url2.URL, error) {
|
|
// Check: currentURL is parseable
|
|
baseURL, err := url.Parse(currentURL)
|
|
if err != nil {
|
|
return nil, xerrors.NewError(fmt.Errorf("error parsing link line: %w input '%s'", err, linkLine), 0, "", false)
|
|
}
|
|
|
|
// Extract the actual URL and the description
|
|
re := regexp.MustCompile(`^=>[ \t]+(\S+)([ \t]+.*)?`)
|
|
matches := re.FindStringSubmatch(linkLine)
|
|
if len(matches) == 0 {
|
|
return nil, xerrors.NewError(fmt.Errorf("error parsing link line: no regexp match for line %s", linkLine), 0, "", false)
|
|
}
|
|
|
|
originalURLStr := matches[1]
|
|
|
|
// Check: Unescape the URL if escaped
|
|
_, err = url.QueryUnescape(originalURLStr)
|
|
if err != nil {
|
|
return nil, xerrors.NewError(fmt.Errorf("error parsing link line: %w input '%s'", err, linkLine), 0, "", false)
|
|
}
|
|
|
|
description := ""
|
|
if len(matches) > 2 {
|
|
description = matches[2]
|
|
}
|
|
|
|
// Parse the URL from the link line
|
|
parsedURL, err := url.Parse(originalURLStr)
|
|
if err != nil {
|
|
return nil, xerrors.NewError(fmt.Errorf("error parsing link line: %w input '%s'", err, linkLine), 0, "", false)
|
|
}
|
|
|
|
// If link URL is relative, resolve full URL
|
|
if !parsedURL.IsAbs() {
|
|
parsedURL = baseURL.ResolveReference(parsedURL)
|
|
}
|
|
|
|
// remove usual first space from URL description:
|
|
// => URL description
|
|
// ^^^^^^^^^^^^
|
|
if len(description) > 0 && description[0] == ' ' {
|
|
description = description[1:]
|
|
}
|
|
|
|
finalURL, err := url2.ParseURL(parsedURL.String(), description, true)
|
|
if err != nil {
|
|
return nil, xerrors.NewError(fmt.Errorf("error parsing link line: %w input '%s'", err, linkLine), 0, "", false)
|
|
}
|
|
|
|
return finalURL, nil
|
|
}
|