.
This commit is contained in:
@@ -2,25 +2,27 @@ package gemini
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"gemini-grc/config"
|
||||
"io"
|
||||
"net"
|
||||
go_url "net/url"
|
||||
gourl "net/url"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"gemini-grc/config"
|
||||
"github.com/guregu/null/v5"
|
||||
)
|
||||
|
||||
type GeminiPageData struct {
|
||||
ResponseCode int
|
||||
MimeType string
|
||||
Lang string
|
||||
GemText string
|
||||
Data []byte
|
||||
type PageData struct {
|
||||
ResponseCode int
|
||||
ResponseHeader string
|
||||
MimeType string
|
||||
Lang string
|
||||
GemText string
|
||||
Data []byte
|
||||
}
|
||||
|
||||
// Resolve the URL hostname and
|
||||
@@ -31,7 +33,7 @@ type GeminiPageData struct {
|
||||
func getHostIPAddresses(hostname string) ([]string, error) {
|
||||
addrs, err := net.LookupHost(hostname)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("%w:%w", ErrNetworkDNS, err)
|
||||
}
|
||||
IpPool.Lock.RLock()
|
||||
defer func() {
|
||||
@@ -41,12 +43,12 @@ func getHostIPAddresses(hostname string) ([]string, error) {
|
||||
}
|
||||
|
||||
func ConnectAndGetData(url string) ([]byte, error) {
|
||||
parsedUrl, err := go_url.Parse(url)
|
||||
parsedURL, err := gourl.Parse(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Could not parse URL, error %w", err)
|
||||
return nil, fmt.Errorf("%w: %w", ErrURLParse, err)
|
||||
}
|
||||
hostname := parsedUrl.Hostname()
|
||||
port := parsedUrl.Port()
|
||||
hostname := parsedURL.Hostname()
|
||||
port := parsedURL.Port()
|
||||
if port == "" {
|
||||
port = "1965"
|
||||
}
|
||||
@@ -58,34 +60,34 @@ func ConnectAndGetData(url string) ([]byte, error) {
|
||||
}
|
||||
conn, err := dialer.Dial("tcp", host)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("TCP connection failed: %w", err)
|
||||
return nil, fmt.Errorf("%w: %w", ErrNetwork, err)
|
||||
}
|
||||
// Make sure we always close the connection.
|
||||
defer func() {
|
||||
// No need to handle error:
|
||||
// Connection will timeout eventually if still open somehow.
|
||||
conn.Close()
|
||||
// Connection will time out eventually if still open somehow.
|
||||
_ = conn.Close()
|
||||
}()
|
||||
|
||||
// Set read and write timeouts on the TCP connection.
|
||||
err = conn.SetReadDeadline(time.Now().Add(time.Duration(config.CONFIG.ResponseTimeout) * time.Second))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error setting connection deadline: %w", err)
|
||||
return nil, fmt.Errorf("%w: %w", ErrNetworkSetConnectionDeadline, err)
|
||||
}
|
||||
err = conn.SetWriteDeadline(time.Now().Add(time.Duration(config.CONFIG.ResponseTimeout) * time.Second))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error setting connection deadline: %w", err)
|
||||
return nil, fmt.Errorf("%w: %w", ErrNetworkSetConnectionDeadline, err)
|
||||
}
|
||||
|
||||
// Perform the TLS handshake
|
||||
tlsConfig := &tls.Config{
|
||||
InsecureSkipVerify: true, // Accept all TLS certs, even if insecure.
|
||||
ServerName: parsedUrl.Hostname(), // SNI should not include port
|
||||
InsecureSkipVerify: true, //nolint:gosec // Accept all TLS certs, even if insecure.
|
||||
ServerName: parsedURL.Hostname(), // SNI should not include port
|
||||
// MinVersion: tls.VersionTLS12, // Use a minimum TLS version. Warning breaks a lot of sites.
|
||||
}
|
||||
tlsConn := tls.Client(conn, tlsConfig)
|
||||
if err := tlsConn.Handshake(); err != nil {
|
||||
return nil, fmt.Errorf("TLS handshake error: %w", err)
|
||||
return nil, fmt.Errorf("%w: %w", ErrNetworkTLS, err)
|
||||
}
|
||||
|
||||
// We read `buf`-sized chunks and add data to `data`.
|
||||
@@ -95,7 +97,7 @@ func ConnectAndGetData(url string) ([]byte, error) {
|
||||
// Send Gemini request to trigger server response.
|
||||
_, err = tlsConn.Write([]byte(fmt.Sprintf("%s\r\n", url)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error sending network request: %w", err)
|
||||
return nil, fmt.Errorf("%w: %w", ErrNetworkCannotWrite, err)
|
||||
}
|
||||
// Read response bytes in len(buf) byte chunks
|
||||
for {
|
||||
@@ -104,68 +106,72 @@ func ConnectAndGetData(url string) ([]byte, error) {
|
||||
data = append(data, buf[:n]...)
|
||||
}
|
||||
if len(data) > config.CONFIG.MaxResponseSize {
|
||||
data = []byte{}
|
||||
return nil, fmt.Errorf("Response size exceeded maximum of %d bytes", config.CONFIG.MaxResponseSize)
|
||||
return nil, fmt.Errorf("%w: %v", ErrNetworkResponseSizeExceededMax, config.CONFIG.MaxResponseSize)
|
||||
}
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
} else {
|
||||
return nil, fmt.Errorf("Network error: %s", err)
|
||||
return nil, fmt.Errorf("%w: %w", ErrNetwork, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Connect to given URL, using the Gemini protocol.
|
||||
// Mutate given Snapshot with the data or the error.
|
||||
func Visit(s *Snapshot) {
|
||||
// Visit given URL, using the Gemini protocol.
|
||||
// Mutates given Snapshot with the data.
|
||||
func Visit(s *Snapshot) error {
|
||||
data, err := ConnectAndGetData(s.URL.String())
|
||||
if err != nil {
|
||||
s.Error = null.StringFrom(err.Error())
|
||||
return
|
||||
return err
|
||||
}
|
||||
pageData, err := processData(data)
|
||||
if err != nil {
|
||||
s.Error = null.StringFrom(err.Error())
|
||||
return
|
||||
return err
|
||||
}
|
||||
s.ResponseCode = null.IntFrom(int64(pageData.ResponseCode))
|
||||
s.MimeType = null.StringFrom(pageData.MimeType)
|
||||
s.Lang = null.StringFrom(pageData.Lang)
|
||||
if pageData.GemText != "" {
|
||||
s.GemText = null.StringFrom(string(pageData.GemText))
|
||||
s.GemText = null.StringFrom(pageData.GemText)
|
||||
}
|
||||
if pageData.Data != nil {
|
||||
s.Data = null.ValueFrom(pageData.Data)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update given snapshot with the
|
||||
// Gemini header data: response code,
|
||||
// mime type and lang (optional)
|
||||
func processData(data []byte) (*GeminiPageData, error) {
|
||||
headers, body, err := getHeadersAndData(data)
|
||||
func processData(data []byte) (*PageData, error) {
|
||||
header, body, err := getHeadersAndData(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
code, mimeType, lang := getMimeTypeAndLang(headers)
|
||||
geminiError := checkGeminiStatusCode(code)
|
||||
code, mimeType, lang := getMimeTypeAndLang(header)
|
||||
var geminiError error
|
||||
if code != 20 {
|
||||
geminiError = NewErrGeminiStatusCode(code, header)
|
||||
}
|
||||
fmt.Printf("%v\n", header)
|
||||
|
||||
if geminiError != nil {
|
||||
return nil, geminiError
|
||||
}
|
||||
pageData := GeminiPageData{
|
||||
ResponseCode: code,
|
||||
MimeType: mimeType,
|
||||
Lang: lang,
|
||||
pageData := PageData{
|
||||
ResponseCode: code,
|
||||
ResponseHeader: header,
|
||||
MimeType: mimeType,
|
||||
Lang: lang,
|
||||
}
|
||||
// If we've got a Gemini document, populate
|
||||
// `GemText` field, otherwise raw data goes to `Data`.
|
||||
if mimeType == "text/gemini" {
|
||||
validBody, err := EnsureValidUTF8(body)
|
||||
validBody, err := BytesToValidUTF8(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("UTF-8 error: %w", err)
|
||||
return nil, fmt.Errorf("%w: %w", ErrUTF8Parse, err)
|
||||
}
|
||||
pageData.GemText = validBody
|
||||
} else {
|
||||
@@ -178,14 +184,14 @@ func processData(data []byte) (*GeminiPageData, error) {
|
||||
// basically the first line of the response
|
||||
// and should contain the response code,
|
||||
// mimeType and language.
|
||||
func getHeadersAndData(data []byte) (firstLine string, rest []byte, err error) {
|
||||
func getHeadersAndData(data []byte) (string, []byte, error) {
|
||||
firstLineEnds := slices.Index(data, '\n')
|
||||
if firstLineEnds == -1 {
|
||||
return "", nil, fmt.Errorf("Could not parse response header")
|
||||
return "", nil, ErrGeminiResponseHeader
|
||||
}
|
||||
firstLine = string(data[:firstLineEnds])
|
||||
rest = data[firstLineEnds+1:]
|
||||
return string(firstLine), rest, nil
|
||||
firstLine := string(data[:firstLineEnds])
|
||||
rest := data[firstLineEnds+1:]
|
||||
return firstLine, rest, nil
|
||||
}
|
||||
|
||||
// Parses code, mime type and language
|
||||
@@ -194,7 +200,7 @@ func getHeadersAndData(data []byte) (firstLine string, rest []byte, err error) {
|
||||
// `20 text/gemini lang=en` (code, mimetype, lang)
|
||||
// `20 text/gemini` (code, mimetype)
|
||||
// `31 gemini://redirected.to/other/site` (code)
|
||||
func getMimeTypeAndLang(headers string) (code int, mimeType string, lang string) {
|
||||
func getMimeTypeAndLang(headers string) (int, string, string) {
|
||||
// Regex that parses code, mimetype & lang
|
||||
re := regexp.MustCompile(`^(\d+)\s+([a-zA-Z0-9/\-+]+)(?:[;\s]+(lang=([a-zA-Z0-9-]+)))?\s*$`)
|
||||
matches := re.FindStringSubmatch(headers)
|
||||
@@ -215,7 +221,7 @@ func getMimeTypeAndLang(headers string) (code int, mimeType string, lang string)
|
||||
if err != nil {
|
||||
return 0, "", ""
|
||||
}
|
||||
mimeType = matches[2]
|
||||
lang = matches[4]
|
||||
mimeType := matches[2]
|
||||
lang := matches[4]
|
||||
return code, mimeType, lang
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user