Proper mimetype parsing, refactoring
This commit is contained in:
11
config.go
11
config.go
@@ -13,6 +13,7 @@ type Config struct {
|
|||||||
rootPath string
|
rootPath string
|
||||||
numOfWorkers int
|
numOfWorkers int
|
||||||
maxResponseSize int
|
maxResponseSize int
|
||||||
|
responseTimeout int
|
||||||
}
|
}
|
||||||
|
|
||||||
func getConfig() *Config {
|
func getConfig() *Config {
|
||||||
@@ -22,6 +23,7 @@ func getConfig() *Config {
|
|||||||
"ROOT_PATH",
|
"ROOT_PATH",
|
||||||
"NUM_OF_WORKERS",
|
"NUM_OF_WORKERS",
|
||||||
"MAX_RESPONSE_SIZE",
|
"MAX_RESPONSE_SIZE",
|
||||||
|
"RESPONSE_TIMEOUT",
|
||||||
} {
|
} {
|
||||||
if env, ok := os.LookupEnv(envVar); !ok {
|
if env, ok := os.LookupEnv(envVar); !ok {
|
||||||
fmt.Fprintf(os.Stderr, "Missing env var %s\n", envVar)
|
fmt.Fprintf(os.Stderr, "Missing env var %s\n", envVar)
|
||||||
@@ -59,6 +61,15 @@ func getConfig() *Config {
|
|||||||
config.maxResponseSize = maxResponseSize
|
config.maxResponseSize = maxResponseSize
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case "RESPONSE_TIMEOUT":
|
||||||
|
{
|
||||||
|
if val, err := strconv.Atoi(env); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Invalid RESPONSE_TIMEOUT value\n")
|
||||||
|
os.Exit(1)
|
||||||
|
} else {
|
||||||
|
config.responseTimeout = val
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
32
fs.go
32
fs.go
@@ -3,6 +3,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
@@ -58,3 +60,33 @@ func calcFilePath(rootPath, urlPath string) (string, error) {
|
|||||||
|
|
||||||
return finalPath, nil
|
return finalPath, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func SaveSnapshot(rootPath string, s *Snapshot) {
|
||||||
|
parentPath := path.Join(rootPath, s.URL.Hostname)
|
||||||
|
urlPath := s.URL.Path
|
||||||
|
// If path is empty, add `index.gmi` as the file to save
|
||||||
|
if urlPath == "" || urlPath == "." {
|
||||||
|
urlPath = fmt.Sprintf("index.gmi")
|
||||||
|
}
|
||||||
|
// If path ends with '/' then add index.gmi for the
|
||||||
|
// directory to be created.
|
||||||
|
if strings.HasSuffix(urlPath, "/") {
|
||||||
|
urlPath = strings.Join([]string{urlPath, "index.gmi"}, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
finalPath, err := calcFilePath(parentPath, urlPath)
|
||||||
|
if err != nil {
|
||||||
|
LogError("Error saving %s: %w", s.URL, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Ensure the directory exists
|
||||||
|
dir := filepath.Dir(finalPath)
|
||||||
|
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
|
||||||
|
LogError("Failed to create directory: %w", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
err = os.WriteFile(finalPath, []byte((*s).Data), 0666)
|
||||||
|
if err != nil {
|
||||||
|
LogError("Error saving %s: %w", s.URL.Full, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
72
gemini.go
72
gemini.go
@@ -4,12 +4,8 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
|
||||||
"path"
|
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func checkGeminiStatusCode(code int) error {
|
func checkGeminiStatusCode(code int) error {
|
||||||
@@ -31,43 +27,9 @@ func checkGeminiStatusCode(code int) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseHeaders(data string) (string, string) {
|
|
||||||
re := regexp.MustCompile(`^\d+\s+([a-zA-Z0-9/\-+]+)[;\s]+(lang=([a-zA-Z0-9-]+))?`)
|
|
||||||
matches := re.FindStringSubmatch(data)
|
|
||||||
if matches == nil || len(matches) <= 1 {
|
|
||||||
return "", ""
|
|
||||||
}
|
|
||||||
return matches[1], matches[3]
|
|
||||||
}
|
|
||||||
|
|
||||||
func ProcessHeaders(snapshot *Snapshot) *Snapshot {
|
|
||||||
LogDebug("[%s] Processing snapshot", snapshot.URL.String())
|
|
||||||
mimetype, lang := parseHeaders(snapshot.Data)
|
|
||||||
if mimetype != "" {
|
|
||||||
snapshot.MimeType = mimetype
|
|
||||||
}
|
|
||||||
if lang != "" {
|
|
||||||
snapshot.Lang = lang
|
|
||||||
}
|
|
||||||
return snapshot
|
|
||||||
}
|
|
||||||
|
|
||||||
func ProcessGemini(snapshot *Snapshot) *Snapshot {
|
func ProcessGemini(snapshot *Snapshot) *Snapshot {
|
||||||
code, err := ParseFirstTwoDigits(snapshot.Data)
|
// Grab link lines
|
||||||
if err != nil {
|
linkLines := ExtractLinkLines(snapshot.GemText)
|
||||||
snapshot.Error = fmt.Errorf("[%s] No/invalid gemini response code", snapshot.URL.String())
|
|
||||||
return snapshot
|
|
||||||
}
|
|
||||||
snapshot.ResponseCode = code
|
|
||||||
|
|
||||||
// Remove response headers from body (first line)
|
|
||||||
index := strings.Index(snapshot.Data, "\n")
|
|
||||||
if index != -1 {
|
|
||||||
snapshot.Data = snapshot.Data[index+1:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Grab any link lines
|
|
||||||
linkLines := ExtractLinkLines(snapshot.Data)
|
|
||||||
LogDebug("[%s] Found %d links", snapshot.URL.String(), len(linkLines))
|
LogDebug("[%s] Found %d links", snapshot.URL.String(), len(linkLines))
|
||||||
|
|
||||||
// Normalize URLs in links, and store them in snapshot
|
// Normalize URLs in links, and store them in snapshot
|
||||||
@@ -86,36 +48,6 @@ func ProcessGemini(snapshot *Snapshot) *Snapshot {
|
|||||||
return snapshot
|
return snapshot
|
||||||
}
|
}
|
||||||
|
|
||||||
func SaveResult(rootPath string, s *Snapshot) {
|
|
||||||
parentPath := path.Join(rootPath, s.URL.Hostname)
|
|
||||||
urlPath := s.URL.Path
|
|
||||||
// If path is empty, add `index.gmi` as the file to save
|
|
||||||
if urlPath == "" || urlPath == "." {
|
|
||||||
urlPath = fmt.Sprintf("index.gmi")
|
|
||||||
}
|
|
||||||
// If path ends with '/' then add index.gmi for the
|
|
||||||
// directory to be created.
|
|
||||||
if strings.HasSuffix(urlPath, "/") {
|
|
||||||
urlPath = strings.Join([]string{urlPath, "index.gmi"}, "")
|
|
||||||
}
|
|
||||||
|
|
||||||
finalPath, err := calcFilePath(parentPath, urlPath)
|
|
||||||
if err != nil {
|
|
||||||
LogError("Error saving %s: %w", s.URL, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Ensure the directory exists
|
|
||||||
dir := filepath.Dir(finalPath)
|
|
||||||
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
|
|
||||||
LogError("Failed to create directory: %w", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
err = os.WriteFile(finalPath, []byte((*s).Data), 0666)
|
|
||||||
if err != nil {
|
|
||||||
LogError("Error saving %s: %w", s.URL.Full, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func ParseUrl(input string, descr string) (*GeminiUrl, error) {
|
func ParseUrl(input string, descr string) (*GeminiUrl, error) {
|
||||||
u, err := url.Parse(input)
|
u, err := url.Parse(input)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
2
go.mod
2
go.mod
@@ -10,5 +10,7 @@ require (
|
|||||||
require (
|
require (
|
||||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
|
golang.org/x/exp v0.0.0-20241004190924-225e2abe05e6 // indirect
|
||||||
|
golang.org/x/net v0.27.0 // indirect
|
||||||
golang.org/x/sys v0.25.0 // indirect
|
golang.org/x/sys v0.25.0 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
6
go.sum
6
go.sum
@@ -1,4 +1,6 @@
|
|||||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||||
|
github.com/gabriel-vasile/mimetype v1.4.5 h1:J7wGKdGu33ocBOhGy0z653k/lFKLFDPJMG8Gql0kxn4=
|
||||||
|
github.com/gabriel-vasile/mimetype v1.4.5/go.mod h1:ibHel+/kbxn9x2407k1izTA1S81ku1z/DlgOW2QE0M4=
|
||||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||||
github.com/jaevor/go-nanoid v1.4.0 h1:mPz0oi3CrQyEtRxeRq927HHtZCJAAtZ7zdy7vOkrvWs=
|
github.com/jaevor/go-nanoid v1.4.0 h1:mPz0oi3CrQyEtRxeRq927HHtZCJAAtZ7zdy7vOkrvWs=
|
||||||
github.com/jaevor/go-nanoid v1.4.0/go.mod h1:GIpPtsvl3eSBsjjIEFQdzzgpi50+Bo1Luk+aYlbJzlc=
|
github.com/jaevor/go-nanoid v1.4.0/go.mod h1:GIpPtsvl3eSBsjjIEFQdzzgpi50+Bo1Luk+aYlbJzlc=
|
||||||
@@ -12,6 +14,10 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
|
|||||||
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||||
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
|
||||||
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
|
||||||
|
golang.org/x/exp v0.0.0-20241004190924-225e2abe05e6 h1:1wqE9dj9NpSm04INVsJhhEUzhuDVjbcyKH91sVyPATw=
|
||||||
|
golang.org/x/exp v0.0.0-20241004190924-225e2abe05e6/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8=
|
||||||
|
golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys=
|
||||||
|
golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE=
|
||||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
|||||||
42
main.go
42
main.go
@@ -1,7 +1,6 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"math/rand/v2"
|
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -25,13 +24,17 @@ func main() {
|
|||||||
|
|
||||||
func runApp() error {
|
func runApp() error {
|
||||||
// urls := []string{"gemini://smol.gr"}
|
// urls := []string{"gemini://smol.gr"}
|
||||||
urls := []string{"gemini://gmi.noulin.net/", "gemini://warmedal.se/~antenna/"}
|
// urls := []string{"gemini://gemini.circumlunar.space/users/solderpunk/gemlog/orphans-of-netscape.gmi"} // Test 31 redirect
|
||||||
|
// urls := []string{"gemini://zaibatsu.circumlunar.space/~solderpunk/gemlog/orphans-of-netscape.gmi"}
|
||||||
|
// urls := []string{"gemini://farcaster.net/berlin/dared.jpg"}
|
||||||
|
// urls := []string{"gemini://smol.gr/media/amstrad_cpc_6128.jpg", "https://go.dev/blog/go-brand/Go-Logo/PNG/Go-Logo_Blue.png"}
|
||||||
|
urls := []string{"gemini://tlgs.one/", "gemini://gmi.noulin.net/", "gemini://warmedal.se/~antenna/"}
|
||||||
|
|
||||||
queue := make(chan string, 1000)
|
queue := make(chan string, 1000)
|
||||||
results := make(chan Snapshot, 100)
|
results := make(chan Snapshot, 100)
|
||||||
done := make(chan struct{})
|
done := make(chan struct{})
|
||||||
|
|
||||||
go spawnStats(queue, results)
|
go spawnStatsReport(queue, results)
|
||||||
go resultsHandler(queue, results)
|
go resultsHandler(queue, results)
|
||||||
spawnWorkers(CONFIG.numOfWorkers, queue, results)
|
spawnWorkers(CONFIG.numOfWorkers, queue, results)
|
||||||
|
|
||||||
@@ -42,7 +45,7 @@ func runApp() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func spawnStats(queue chan string, results chan Snapshot) {
|
func spawnStatsReport(queue chan string, results chan Snapshot) {
|
||||||
ticker := time.NewTicker(time.Duration(time.Second * 10))
|
ticker := time.NewTicker(time.Duration(time.Second * 10))
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
for range ticker.C {
|
for range ticker.C {
|
||||||
@@ -53,7 +56,6 @@ func spawnStats(queue chan string, results chan Snapshot) {
|
|||||||
|
|
||||||
func spawnWorkers(numOfWorkers int, queue <-chan string, results chan Snapshot) {
|
func spawnWorkers(numOfWorkers int, queue <-chan string, results chan Snapshot) {
|
||||||
LogInfo("Spawning %d workers", numOfWorkers)
|
LogInfo("Spawning %d workers", numOfWorkers)
|
||||||
// Start worker goroutines
|
|
||||||
for i := 0; i < numOfWorkers; i++ {
|
for i := 0; i < numOfWorkers; i++ {
|
||||||
go func(i int) {
|
go func(i int) {
|
||||||
worker(i, queue, results)
|
worker(i, queue, results)
|
||||||
@@ -85,8 +87,16 @@ func resultsHandler(queue chan string, results <-chan Snapshot) {
|
|||||||
|
|
||||||
func worker(id int, queue <-chan string, results chan Snapshot) {
|
func worker(id int, queue <-chan string, results chan Snapshot) {
|
||||||
for url := range queue {
|
for url := range queue {
|
||||||
LogDebug("Worker %d visiting %s", id, url)
|
if !shouldVisit(url) {
|
||||||
result := Visit(url)
|
LogInfo("Skipping %s", url)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
LogInfo("Worker %d visiting %s", id, url)
|
||||||
|
result, err := Visit(url)
|
||||||
|
if err != nil {
|
||||||
|
LogError("[%s] %w", url, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
// If we encountered an error when
|
// If we encountered an error when
|
||||||
// visiting, skip processing
|
// visiting, skip processing
|
||||||
if result.Error != nil {
|
if result.Error != nil {
|
||||||
@@ -94,23 +104,25 @@ func worker(id int, queue <-chan string, results chan Snapshot) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
LogDebug("Worker %d processing %s", id, url)
|
LogDebug("Worker %d processing %s", id, url)
|
||||||
result = ProcessHeaders(result)
|
|
||||||
if result.Error != nil {
|
|
||||||
results <- *result
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if result.MimeType == "text/gemini" {
|
if result.MimeType == "text/gemini" {
|
||||||
result = ProcessGemini(result)
|
result = ProcessGemini(result)
|
||||||
}
|
}
|
||||||
if shouldPersist(result) {
|
if shouldPersist(result) {
|
||||||
LogInfo("Worker %d saving %s", id, url)
|
LogDebug("Worker %d saving %s", id, url)
|
||||||
SaveResult(CONFIG.rootPath, result)
|
SaveSnapshot(CONFIG.rootPath, result)
|
||||||
}
|
}
|
||||||
results <- *result
|
results <- *result
|
||||||
time.Sleep(time.Duration(rand.IntN(5)) * time.Second)
|
// time.Sleep(time.Duration(rand.IntN(5)) * time.Second)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func shouldVisit(url string) bool {
|
||||||
|
if !strings.HasPrefix(url, "gemini://") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func shouldPersist(result *Snapshot) bool {
|
func shouldPersist(result *Snapshot) bool {
|
||||||
if result.MimeType == "text/gemini" ||
|
if result.MimeType == "text/gemini" ||
|
||||||
strings.HasPrefix(result.MimeType, "image/") ||
|
strings.HasPrefix(result.MimeType, "image/") ||
|
||||||
|
|||||||
91
network.go
91
network.go
@@ -4,25 +4,21 @@ import (
|
|||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"regexp"
|
||||||
|
"slices"
|
||||||
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Visit(url string) (result *Snapshot) {
|
func Visit(url string) (snapshot *Snapshot, err error) {
|
||||||
result = &Snapshot{Timestamp: time.Now(), UID: UID()}
|
snapshot = &Snapshot{Timestamp: time.Now(), UID: UID()}
|
||||||
|
|
||||||
// Wrap error with additional information
|
|
||||||
defer func() {
|
|
||||||
if result.Error != nil {
|
|
||||||
result.Error = fmt.Errorf("[%s] Error: %w", result.URL, result.Error)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
geminiUrl, err := ParseUrl(url, "")
|
geminiUrl, err := ParseUrl(url, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
result.Error = err
|
snapshot.Error = fmt.Errorf("[%s] %w", url, err)
|
||||||
return result
|
return snapshot, nil
|
||||||
}
|
}
|
||||||
result.URL = *geminiUrl
|
snapshot.URL = *geminiUrl
|
||||||
|
|
||||||
LogDebug("[%s] Connecting", geminiUrl)
|
LogDebug("[%s] Connecting", geminiUrl)
|
||||||
|
|
||||||
@@ -32,25 +28,29 @@ func Visit(url string) (result *Snapshot) {
|
|||||||
}
|
}
|
||||||
conn, err := tls.Dial("tcp", fmt.Sprintf("%s:%d", geminiUrl.Hostname, geminiUrl.Port), tlsConfig)
|
conn, err := tls.Dial("tcp", fmt.Sprintf("%s:%d", geminiUrl.Hostname, geminiUrl.Port), tlsConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
result.Error = err
|
snapshot.Error = err
|
||||||
return result
|
return snapshot, nil
|
||||||
}
|
}
|
||||||
// Defer properly: Also handle possible
|
// Defer properly: Also handle possible
|
||||||
// error of conn.Close()
|
// error of conn.Close()
|
||||||
defer func() {
|
defer func() {
|
||||||
err := conn.Close()
|
err := conn.Close()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
result.Error = fmt.Errorf("[%s] Closing connection error, ignoring: %w", result.URL.String(), err)
|
snapshot.Error = fmt.Errorf("[%s] Closing connection error, ignoring: %w", snapshot.URL.String(), err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Read data from the connection
|
// Read data from the connection
|
||||||
// TODO make timeout configurable
|
conn.SetReadDeadline(time.Now().Add(time.Duration(CONFIG.responseTimeout) * time.Second))
|
||||||
conn.SetReadDeadline(time.Now().Add(5 * time.Second))
|
buf := make([]byte, 4096)
|
||||||
buf := make([]byte, 1024)
|
|
||||||
var data []byte
|
var data []byte
|
||||||
|
|
||||||
// Write Gemini request to get response.
|
// Write Gemini request to get response.
|
||||||
|
// paths := []string{"/", ".", ""}
|
||||||
|
// if slices.Contains(paths, geminiUrl.Path) || strings.HasSuffix(geminiUrl.Path, "gmi") {
|
||||||
conn.Write([]byte(fmt.Sprintf("%s\r\n", geminiUrl.String())))
|
conn.Write([]byte(fmt.Sprintf("%s\r\n", geminiUrl.String())))
|
||||||
|
// }
|
||||||
|
|
||||||
// Read response bytes in len(buf) byte chunks
|
// Read response bytes in len(buf) byte chunks
|
||||||
for {
|
for {
|
||||||
n, err := conn.Read(buf)
|
n, err := conn.Read(buf)
|
||||||
@@ -58,21 +58,60 @@ func Visit(url string) (result *Snapshot) {
|
|||||||
data = append(data, buf[:n]...)
|
data = append(data, buf[:n]...)
|
||||||
}
|
}
|
||||||
if len(data) > CONFIG.maxResponseSize {
|
if len(data) > CONFIG.maxResponseSize {
|
||||||
result.Error = fmt.Errorf("Response size exceeded maximum of %d bytes", CONFIG.maxResponseSize)
|
snapshot.Error = fmt.Errorf("[%s] Response size exceeded maximum of %d bytes", url, CONFIG.maxResponseSize)
|
||||||
return result
|
return snapshot, nil
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
break
|
break
|
||||||
} else {
|
} else {
|
||||||
result.Error = err
|
snapshot.Error = fmt.Errorf("[%s] %w", url, err)
|
||||||
return result
|
return snapshot, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LogDebug("[%s] Received %d bytes", geminiUrl.String(), len(data))
|
LogDebug("[%s] Received %d bytes", geminiUrl.String(), len(data))
|
||||||
// time.Sleep(time.Duration(time.Second * 2))
|
err = processResponse(snapshot, data)
|
||||||
// LogDebug("[%s] Visitor finished", geminiUrl.String())
|
if err != nil {
|
||||||
result.Data = string(data)
|
snapshot.Error = fmt.Errorf("%w", err)
|
||||||
return result
|
}
|
||||||
|
return snapshot, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func processResponse(snapshot *Snapshot, data []byte) error {
|
||||||
|
headers, body, err := getHeadersAndData(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
code, mimeType, lang := getMimeTypeAndLang(headers)
|
||||||
|
snapshot.ResponseCode, snapshot.MimeType, snapshot.Lang, snapshot.Data = code, mimeType, lang, body
|
||||||
|
if mimeType == "text/gemini" {
|
||||||
|
snapshot.GemText = string(body)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getHeadersAndData(data []byte) (string, []byte, error) {
|
||||||
|
firstLineEnds := slices.Index(data, '\n')
|
||||||
|
if firstLineEnds == -1 {
|
||||||
|
return "", nil, fmt.Errorf("Could not parse response header")
|
||||||
|
}
|
||||||
|
firstLine := data[:firstLineEnds]
|
||||||
|
rest := data[firstLineEnds+1:]
|
||||||
|
return string(firstLine), rest, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getMimeTypeAndLang(headers string) (int, string, string) {
|
||||||
|
re := regexp.MustCompile(`^(\d+)\s+([a-zA-Z0-9/\-+]+)[;\s]+(lang=([a-zA-Z0-9-]+))?`)
|
||||||
|
matches := re.FindStringSubmatch(headers)
|
||||||
|
if matches == nil || len(matches) <= 1 {
|
||||||
|
return 0, "", ""
|
||||||
|
}
|
||||||
|
code, err := strconv.Atoi(matches[1])
|
||||||
|
if err != nil {
|
||||||
|
return 0, "", ""
|
||||||
|
}
|
||||||
|
mimeType := matches[2]
|
||||||
|
lang := matches[4]
|
||||||
|
return code, mimeType, lang
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,8 @@ type Snapshot struct {
|
|||||||
URL GeminiUrl `json:"url,omitempty"`
|
URL GeminiUrl `json:"url,omitempty"`
|
||||||
Timestamp time.Time `json:"timestamp,omitempty"`
|
Timestamp time.Time `json:"timestamp,omitempty"`
|
||||||
MimeType string `json:"mimetype,omitempty"`
|
MimeType string `json:"mimetype,omitempty"`
|
||||||
Data string `json:"data,omitempty"`
|
Data []byte `json:"data,omitempty"`
|
||||||
|
GemText string `json:"gemtext,omitempty"`
|
||||||
Links []GeminiUrl `json:"links,omitempty"`
|
Links []GeminiUrl `json:"links,omitempty"`
|
||||||
Lang string `json:"lang,omitempty"`
|
Lang string `json:"lang,omitempty"`
|
||||||
// Gemini status code
|
// Gemini status code
|
||||||
|
|||||||
Reference in New Issue
Block a user