package gemini import ( "fmt" "gemini-grc/common" "net/url" "regexp" "strconv" "gemini-grc/logging" ) func GetPageLinks(currentURL common.URL, gemtext string) common.LinkList { // Grab link lines linkLines := ExtractLinkLines(gemtext) if len(linkLines) == 0 { return nil } var linkURLs common.LinkList // Normalize URLs in links, and store them in snapshot for _, line := range linkLines { linkURL, err := NormalizeLink(line, currentURL.String()) if err != nil { logging.LogDebug("%s: %s", common.ErrGeminiLinkLineParse, err) continue } linkURLs = append(linkURLs, *linkURL) } return linkURLs } // ExtractLinkLines takes a Gemtext document as a string and returns all lines that are link lines func ExtractLinkLines(gemtext string) []string { // Define the regular expression pattern to match link lines re := regexp.MustCompile(`(?m)^=>[ \t]+.*`) // Find all matches using the regular expression matches := re.FindAllString(gemtext, -1) return matches } // NormalizeLink takes a single link line and the current URL, // return the URL converted to an absolute URL // and its description. func NormalizeLink(linkLine string, currentURL string) (*common.URL, error) { // Parse the current URL baseURL, err := url.Parse(currentURL) if err != nil { return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err) } // Regular expression to extract the URL part from a link line re := regexp.MustCompile(`^=>[ \t]+(\S+)([ \t]+.*)?`) // Use regex to extract the URL and the rest of the line matches := re.FindStringSubmatch(linkLine) if len(matches) == 0 { // If the line doesn't match the expected format, return it unchanged return nil, fmt.Errorf("%w for link line %s", common.ErrGeminiLinkLineParse, linkLine) } originalURLStr := matches[1] _, err = url.QueryUnescape(originalURLStr) if err != nil { return nil, fmt.Errorf("%w: %w", common.ErrURLDecode, err) } restOfLine := "" if len(matches) > 2 { restOfLine = matches[2] } // Parse the URL from the link line parsedURL, err := url.Parse(originalURLStr) if err != nil { // If URL parsing fails, return an error return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err) } // Resolve relative URLs against the base URL if !parsedURL.IsAbs() { parsedURL = baseURL.ResolveReference(parsedURL) } // Remove usual first space from URL description: // => URL description // ^^^^^^^^^^^^ if len(restOfLine) > 0 && restOfLine[0] == ' ' { restOfLine = restOfLine[1:] } finalURL, err := common.ParseURL(parsedURL.String(), restOfLine) if err != nil { // If URL parsing fails, return an error return nil, fmt.Errorf("%w: %w", common.ErrURLParse, err) } return finalURL, nil } // ParseFirstTwoDigits takes a string and returns the first one or two digits as an int. // If no valid digits are found, it returns an error. func ParseFirstTwoDigits(input string) (int, error) { // Define the regular expression pattern to match one or two leading digits re := regexp.MustCompile(`^(\d{1,2})`) // Find the first match in the string matches := re.FindStringSubmatch(input) if len(matches) == 0 { return 0, fmt.Errorf("%w", common.ErrGeminiResponseHeader) } // Parse the captured match as an integer snapshot, err := strconv.Atoi(matches[1]) if err != nil { return 0, fmt.Errorf("%w: %w", common.ErrTextParse, err) } return snapshot, nil } // extractRedirectTarget returns the redirection // URL by parsing the header (or error message) func extractRedirectTarget(currentURL common.URL, input string) (*common.URL, error) { // \d+ - matches one or more digits // \s+ - matches one or more whitespace // ([^\r]+) - captures everything until it hits a \r (or end of string) pattern := `\d+\s+([^\r]+)` re := regexp.MustCompile(pattern) matches := re.FindStringSubmatch(input) if len(matches) < 2 { return nil, fmt.Errorf("%w: %s", common.ErrGeminiRedirect, input) } newURL, err := common.DeriveAbsoluteURL(currentURL, matches[1]) if err != nil { return nil, fmt.Errorf("%w: %w: %s", common.ErrGeminiRedirect, err, input) } return newURL, nil }