Update and refactor core functionality
- Update common package utilities - Refactor network code for better error handling - Remove deprecated files and functionality - Enhance blacklist and filtering capabilities - Improve snapshot handling and processing
This commit is contained in:
@@ -1,166 +1,23 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
stdurl "net/url"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
errors2 "gemini-grc/common/errors"
|
||||
commonErrors "gemini-grc/common/errors"
|
||||
"gemini-grc/common/snapshot"
|
||||
_url "gemini-grc/common/url"
|
||||
"gemini-grc/config"
|
||||
"gemini-grc/logging"
|
||||
"github.com/antanst/go_errors"
|
||||
"github.com/guregu/null/v5"
|
||||
)
|
||||
|
||||
// Visit given URL, using the Gemini protocol.
|
||||
// Mutates given Snapshot with the data.
|
||||
// In case of error, we store the error string
|
||||
// inside snapshot and return the error.
|
||||
func Visit(url string) (s *snapshot.Snapshot, err error) {
|
||||
s, err = snapshot.SnapshotFromURL(url, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// GeminiError and HostError should
|
||||
// be stored in the snapshot. Other
|
||||
// errors are returned.
|
||||
if errors2.IsHostError(err) {
|
||||
s.Error = null.StringFrom(err.Error())
|
||||
err = nil
|
||||
} else if IsGeminiError(err) {
|
||||
s.Error = null.StringFrom(err.Error())
|
||||
s.Header = null.StringFrom(go_errors.Unwrap(err).(*GeminiError).Header)
|
||||
s.ResponseCode = null.IntFrom(int64(go_errors.Unwrap(err).(*GeminiError).Code))
|
||||
err = nil
|
||||
} else {
|
||||
s = nil
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
data, err := ConnectAndGetData(s.URL.String())
|
||||
if err != nil {
|
||||
return s, err
|
||||
}
|
||||
|
||||
s, err = processData(*s, data)
|
||||
if err != nil {
|
||||
return s, err
|
||||
}
|
||||
|
||||
if isGeminiCapsule(s) {
|
||||
links := GetPageLinks(s.URL, s.GemText.String)
|
||||
if len(links) > 0 {
|
||||
logging.LogDebug("Found %d links", len(links))
|
||||
s.Links = null.ValueFrom(links)
|
||||
}
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func ConnectAndGetData(url string) ([]byte, error) {
|
||||
parsedURL, err := stdurl.Parse(url)
|
||||
if err != nil {
|
||||
return nil, go_errors.NewError(err)
|
||||
}
|
||||
hostname := parsedURL.Hostname()
|
||||
port := parsedURL.Port()
|
||||
if port == "" {
|
||||
port = "1965"
|
||||
}
|
||||
host := fmt.Sprintf("%s:%s", hostname, port)
|
||||
timeoutDuration := time.Duration(config.CONFIG.ResponseTimeout) * time.Second
|
||||
// Establish the underlying TCP connection.
|
||||
dialer := &net.Dialer{
|
||||
Timeout: timeoutDuration,
|
||||
}
|
||||
conn, err := dialer.Dial("tcp", host)
|
||||
if err != nil {
|
||||
return nil, errors2.NewHostError(err)
|
||||
}
|
||||
// Make sure we always close the connection.
|
||||
defer func() {
|
||||
_ = conn.Close()
|
||||
}()
|
||||
|
||||
// Set read and write timeouts on the TCP connection.
|
||||
err = conn.SetReadDeadline(time.Now().Add(timeoutDuration))
|
||||
if err != nil {
|
||||
return nil, errors2.NewHostError(err)
|
||||
}
|
||||
err = conn.SetWriteDeadline(time.Now().Add(timeoutDuration))
|
||||
if err != nil {
|
||||
return nil, errors2.NewHostError(err)
|
||||
}
|
||||
|
||||
// Perform the TLS handshake
|
||||
tlsConfig := &tls.Config{
|
||||
InsecureSkipVerify: true, //nolint:gosec // Accept all TLS certs, even if insecure.
|
||||
ServerName: parsedURL.Hostname(), // SNI says we should not include port in hostname
|
||||
// MinVersion: tls.VersionTLS12, // Use a minimum TLS version. Warning breaks a lot of sites.
|
||||
}
|
||||
tlsConn := tls.Client(conn, tlsConfig)
|
||||
err = tlsConn.SetReadDeadline(time.Now().Add(timeoutDuration))
|
||||
if err != nil {
|
||||
return nil, errors2.NewHostError(err)
|
||||
}
|
||||
err = tlsConn.SetWriteDeadline(time.Now().Add(timeoutDuration))
|
||||
if err != nil {
|
||||
return nil, errors2.NewHostError(err)
|
||||
}
|
||||
err = tlsConn.Handshake()
|
||||
if err != nil {
|
||||
return nil, errors2.NewHostError(err)
|
||||
}
|
||||
|
||||
// We read `buf`-sized chunks and add data to `data`.
|
||||
buf := make([]byte, 4096)
|
||||
var data []byte
|
||||
|
||||
// Send Gemini request to trigger server response.
|
||||
// Fix for stupid server bug:
|
||||
// Some servers return 'Header: 53 No proxying to other hosts or ports!'
|
||||
// when the port is 1965 and is still specified explicitly in the URL.
|
||||
url2, _ := _url.ParseURL(url, "", true)
|
||||
_, err = tlsConn.Write([]byte(fmt.Sprintf("%s\r\n", url2.StringNoDefaultPort())))
|
||||
if err != nil {
|
||||
return nil, errors2.NewHostError(err)
|
||||
}
|
||||
// Read response bytes in len(buf) byte chunks
|
||||
for {
|
||||
n, err := tlsConn.Read(buf)
|
||||
if n > 0 {
|
||||
data = append(data, buf[:n]...)
|
||||
}
|
||||
if len(data) > config.CONFIG.MaxResponseSize {
|
||||
return nil, errors2.NewHostError(err)
|
||||
}
|
||||
if err != nil {
|
||||
if go_errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
return nil, errors2.NewHostError(err)
|
||||
}
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func processData(s snapshot.Snapshot, data []byte) (*snapshot.Snapshot, error) {
|
||||
// ProcessData processes the raw data from a Gemini response and populates the Snapshot.
|
||||
// This function is exported for use by the robotsMatch package.
|
||||
func ProcessData(s snapshot.Snapshot, data []byte) (*snapshot.Snapshot, error) {
|
||||
header, body, err := getHeadersAndData(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return &s, err
|
||||
}
|
||||
code, mimeType, lang := getMimeTypeAndLang(header)
|
||||
|
||||
@@ -198,7 +55,7 @@ func processData(s snapshot.Snapshot, data []byte) (*snapshot.Snapshot, error) {
|
||||
func getHeadersAndData(data []byte) (string, []byte, error) {
|
||||
firstLineEnds := slices.Index(data, '\n')
|
||||
if firstLineEnds == -1 {
|
||||
return "", nil, errors2.NewHostError(fmt.Errorf("error parsing header"))
|
||||
return "", nil, commonErrors.NewHostError(fmt.Errorf("error parsing header"))
|
||||
}
|
||||
firstLine := string(data[:firstLineEnds])
|
||||
rest := data[firstLineEnds+1:]
|
||||
@@ -252,4 +109,4 @@ func getMimeTypeAndLang(headers string) (int, string, string) {
|
||||
|
||||
func isGeminiCapsule(s *snapshot.Snapshot) bool {
|
||||
return !s.Error.Valid && s.MimeType.Valid && s.MimeType.String == "text/gemini"
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user