245 lines
6.8 KiB
Go
245 lines
6.8 KiB
Go
package gemini
|
|
|
|
import (
|
|
"crypto/tls"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
gourl "net/url"
|
|
"regexp"
|
|
"slices"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"gemini-grc/config"
|
|
"gemini-grc/logging"
|
|
"github.com/guregu/null/v5"
|
|
)
|
|
|
|
type PageData struct {
|
|
ResponseCode int
|
|
ResponseHeader string
|
|
MimeType string
|
|
Lang string
|
|
GemText string
|
|
Data []byte
|
|
}
|
|
|
|
// Resolve the URL hostname and
|
|
// check if we already have an open
|
|
// connection to this host.
|
|
// If we can connect, return a list
|
|
// of the resolved IPs.
|
|
func getHostIPAddresses(hostname string) ([]string, error) {
|
|
addrs, err := net.LookupHost(hostname)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w:%w", ErrNetworkDNS, err)
|
|
}
|
|
IPPool.Lock.RLock()
|
|
defer func() {
|
|
IPPool.Lock.RUnlock()
|
|
}()
|
|
return addrs, nil
|
|
}
|
|
|
|
func ConnectAndGetData(url string) ([]byte, error) {
|
|
parsedURL, err := gourl.Parse(url)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w: %w", ErrURLParse, err)
|
|
}
|
|
hostname := parsedURL.Hostname()
|
|
port := parsedURL.Port()
|
|
if port == "" {
|
|
port = "1965"
|
|
}
|
|
host := fmt.Sprintf("%s:%s", hostname, port)
|
|
// Establish the underlying TCP connection.
|
|
dialer := &net.Dialer{
|
|
Timeout: time.Duration(config.CONFIG.ResponseTimeout) * time.Second,
|
|
}
|
|
conn, err := dialer.Dial("tcp", host)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w: %w", ErrNetwork, err)
|
|
}
|
|
// Make sure we always close the connection.
|
|
defer func() {
|
|
// No need to handle error:
|
|
// Connection will time out eventually if still open somehow.
|
|
_ = conn.Close()
|
|
}()
|
|
|
|
// Set read and write timeouts on the TCP connection.
|
|
err = conn.SetReadDeadline(time.Now().Add(time.Duration(config.CONFIG.ResponseTimeout) * time.Second))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w: %w", ErrNetworkSetConnectionDeadline, err)
|
|
}
|
|
err = conn.SetWriteDeadline(time.Now().Add(time.Duration(config.CONFIG.ResponseTimeout) * time.Second))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w: %w", ErrNetworkSetConnectionDeadline, err)
|
|
}
|
|
|
|
// Perform the TLS handshake
|
|
tlsConfig := &tls.Config{
|
|
InsecureSkipVerify: true, //nolint:gosec // Accept all TLS certs, even if insecure.
|
|
ServerName: parsedURL.Hostname(), // SNI says we should not include port in hostname
|
|
// MinVersion: tls.VersionTLS12, // Use a minimum TLS version. Warning breaks a lot of sites.
|
|
}
|
|
tlsConn := tls.Client(conn, tlsConfig)
|
|
if err := tlsConn.Handshake(); err != nil {
|
|
return nil, fmt.Errorf("%w: %w", ErrNetworkTLS, err)
|
|
}
|
|
|
|
// We read `buf`-sized chunks and add data to `data`.
|
|
buf := make([]byte, 4096)
|
|
var data []byte
|
|
|
|
// Send Gemini request to trigger server response.
|
|
// Fix for stupid server bug:
|
|
// Some servers return 'Header: 53 No proxying to other hosts or ports!'
|
|
// when the port is 1965 and is still specified explicitly in the URL.
|
|
_url, _ := ParseURL(url, "")
|
|
_, err = tlsConn.Write([]byte(fmt.Sprintf("%s\r\n", _url.StringNoDefaultPort())))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w: %w", ErrNetworkCannotWrite, err)
|
|
}
|
|
// Read response bytes in len(buf) byte chunks
|
|
for {
|
|
n, err := tlsConn.Read(buf)
|
|
if n > 0 {
|
|
data = append(data, buf[:n]...)
|
|
}
|
|
if len(data) > config.CONFIG.MaxResponseSize {
|
|
return nil, fmt.Errorf("%w: %v", ErrNetworkResponseSizeExceededMax, config.CONFIG.MaxResponseSize)
|
|
}
|
|
if err != nil {
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
}
|
|
return nil, fmt.Errorf("%w: %w", ErrNetwork, err)
|
|
}
|
|
}
|
|
return data, nil
|
|
}
|
|
|
|
// Visit given URL, using the Gemini protocol.
|
|
// Mutates given Snapshot with the data.
|
|
// In case of error, we store the error string
|
|
// inside snapshot and return the error.
|
|
func Visit(s *Snapshot) (err error) {
|
|
// Don't forget to also store error
|
|
// response code (if we have one)
|
|
// and header
|
|
defer func() {
|
|
if err != nil {
|
|
s.Error = null.StringFrom(err.Error())
|
|
if errors.As(err, new(*GeminiError)) {
|
|
s.Header = null.StringFrom(err.(*GeminiError).Header)
|
|
s.ResponseCode = null.IntFrom(int64(err.(*GeminiError).Code))
|
|
}
|
|
}
|
|
}()
|
|
s.Timestamp = null.TimeFrom(time.Now())
|
|
data, err := ConnectAndGetData(s.URL.String())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pageData, err := processData(data)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
s.Header = null.StringFrom(pageData.ResponseHeader)
|
|
s.ResponseCode = null.IntFrom(int64(pageData.ResponseCode))
|
|
s.MimeType = null.StringFrom(pageData.MimeType)
|
|
s.Lang = null.StringFrom(pageData.Lang)
|
|
if pageData.GemText != "" {
|
|
s.GemText = null.StringFrom(pageData.GemText)
|
|
}
|
|
if pageData.Data != nil {
|
|
s.Data = null.ValueFrom(pageData.Data)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// processData returne results from
|
|
// parsing Gemini header data:
|
|
// Code, mime type and lang (optional)
|
|
// Returns error if header was invalid
|
|
func processData(data []byte) (*PageData, error) {
|
|
header, body, err := getHeadersAndData(data)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
code, mimeType, lang := getMimeTypeAndLang(header)
|
|
logging.LogDebug("Header: %s", strings.TrimSpace(header))
|
|
if code != 20 {
|
|
return nil, NewErrGeminiStatusCode(code, header)
|
|
}
|
|
|
|
pageData := PageData{
|
|
ResponseCode: code,
|
|
ResponseHeader: header,
|
|
MimeType: mimeType,
|
|
Lang: lang,
|
|
}
|
|
// If we've got a Gemini document, populate
|
|
// `GemText` field, otherwise raw data goes to `Data`.
|
|
if mimeType == "text/gemini" {
|
|
validBody, err := BytesToValidUTF8(body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w: %w", ErrUTF8Parse, err)
|
|
}
|
|
pageData.GemText = validBody
|
|
} else {
|
|
pageData.Data = body
|
|
}
|
|
return &pageData, nil
|
|
}
|
|
|
|
// Checks for a Gemini header, which is
|
|
// basically the first line of the response
|
|
// and should contain the response code,
|
|
// mimeType and language.
|
|
func getHeadersAndData(data []byte) (string, []byte, error) {
|
|
firstLineEnds := slices.Index(data, '\n')
|
|
if firstLineEnds == -1 {
|
|
return "", nil, ErrGeminiResponseHeader
|
|
}
|
|
firstLine := string(data[:firstLineEnds])
|
|
rest := data[firstLineEnds+1:]
|
|
return firstLine, rest, nil
|
|
}
|
|
|
|
// Parses code, mime type and language
|
|
// from a Gemini header.
|
|
// Examples:
|
|
// `20 text/gemini lang=en` (code, mimetype, lang)
|
|
// `20 text/gemini` (code, mimetype)
|
|
// `31 gemini://redirected.to/other/site` (code)
|
|
func getMimeTypeAndLang(headers string) (int, string, string) {
|
|
// Regex that parses code, mimetype & optional charset/lang parameters
|
|
re := regexp.MustCompile(`^(\d+)\s+([a-zA-Z0-9/\-+]+)(?:[;\s]+(?:(?:charset|lang)=([a-zA-Z0-9-]+)))?\s*$`)
|
|
matches := re.FindStringSubmatch(headers)
|
|
if matches == nil || len(matches) <= 1 {
|
|
// Try to get code at least
|
|
re := regexp.MustCompile(`^(\d+)\s+`)
|
|
matches := re.FindStringSubmatch(headers)
|
|
if matches == nil || len(matches) <= 1 {
|
|
return 0, "", ""
|
|
}
|
|
code, err := strconv.Atoi(matches[1])
|
|
if err != nil {
|
|
return 0, "", ""
|
|
}
|
|
return code, "", ""
|
|
}
|
|
code, err := strconv.Atoi(matches[1])
|
|
if err != nil {
|
|
return 0, "", ""
|
|
}
|
|
mimeType := matches[2]
|
|
param := matches[3] // This will capture either charset or lang value
|
|
return code, mimeType, param
|
|
}
|