Better error handling, many fixes all around

This commit is contained in:
2024-12-09 19:53:15 +02:00
parent b52d4f6532
commit 7a36614232
15 changed files with 520 additions and 233 deletions

View File

@@ -9,14 +9,16 @@ import (
"gemini-grc/logging"
)
func ProcessGemini(snapshot *Snapshot) *Snapshot {
func GetPageLinks(currentURL URL, gemtext string) LinkList {
// Grab link lines
linkLines := ExtractLinkLines(snapshot.GemText.String)
logging.LogDebug("[%s] Found %d links", snapshot.URL.String(), len(linkLines))
linkLines := ExtractLinkLines(gemtext)
if len(linkLines) == 0 {
return nil
}
var linkURLs LinkList
// Normalize URLs in links, and store them in snapshot
for _, line := range linkLines {
normalizedLink, descr, err := NormalizeLink(line, snapshot.URL.String())
normalizedLink, descr, err := NormalizeLink(line, currentURL.String())
if err != nil {
logging.LogDebug("Cannot normalize URL in line '%s': %v", line, err)
continue
@@ -26,13 +28,10 @@ func ProcessGemini(snapshot *Snapshot) *Snapshot {
logging.LogDebug("Cannot parse URL in link '%s': %v", line, err)
continue
}
if snapshot.Links == nil {
snapshot.Links = &LinkList{*geminiUrl}
} else {
*snapshot.Links = append(*snapshot.Links, *geminiUrl)
}
logging.LogDebug(geminiUrl.String())
linkURLs = append(linkURLs, *geminiUrl)
}
return snapshot
return linkURLs
}
// ExtractLinkLines takes a Gemtext document as a string and returns all lines that are link lines
@@ -124,3 +123,22 @@ func ParseFirstTwoDigits(input string) (int, error) {
return snapshot, nil
}
// extractRedirectTarget returns the redirection
// URL by parsing the header (or error message)
func extractRedirectTarget(currentURL URL, input string) (*URL, error) {
// \d+ - matches one or more digits
// \s+ - matches one or more whitespace
// ([^\r]+) - captures everything until it hits a \r (or end of string)
pattern := `\d+\s+([^\r]+)`
re := regexp.MustCompile(pattern)
matches := re.FindStringSubmatch(input)
if len(matches) < 2 {
return nil, fmt.Errorf("%w: Cannot find redirect target from header %s", ErrGeminiResponseHeader, input)
}
newURL, err := DeriveAbsoluteURL(currentURL, matches[1])
if err != nil {
return nil, fmt.Errorf("%w: Cannot find redirect target from header: %w", ErrGeminiResponseHeader, err)
}
return newURL, nil
}