diff --git a/gopher/errors.go b/gopher/errors.go new file mode 100644 index 0000000..2d59fef --- /dev/null +++ b/gopher/errors.go @@ -0,0 +1,32 @@ +package gopher + +import ( + "gemini-grc/errors" +) + +// GopherError is an error encountered while +// visiting a Gopher host, and is only for +// Gopher errors (item type indicator 3). +type GopherError struct { + Err error +} + +func (e *GopherError) Error() string { + return e.Err.Error() +} + +func (e *GopherError) Unwrap() error { + return e.Err +} + +func NewGopherError(err error) error { + return &GopherError{Err: err} +} + +func IsGopherError(err error) bool { + if err == nil { + return false + } + var asError *GopherError + return errors.As(err, &asError) +} diff --git a/gopher/network.go b/gopher/network.go new file mode 100644 index 0000000..745c6e4 --- /dev/null +++ b/gopher/network.go @@ -0,0 +1,283 @@ +package gopher + +import ( + "fmt" + "io" + "net" + stdurl "net/url" + "regexp" + "strings" + "time" + "unicode/utf8" + + errors2 "gemini-grc/common/errors" + "gemini-grc/common/linkList" + "gemini-grc/common/snapshot" + _url "gemini-grc/common/url" + "gemini-grc/config" + "gemini-grc/errors" + "gemini-grc/logging" + "github.com/guregu/null/v5" +) + +// References: +// RFC 1436 https://www.rfc-editor.org/rfc/rfc1436.html + +// The default port for Gopher is 70. +// Originally Gopher used ASCII or +// ISO-8859-1, now others use UTF-8. +// In any case, just converting to UTF-8 +// will work. If not, we bail. + +// Here's the complete list of Gopher item type indicators (prefixes): +// +// `0` - Plain Text File +// `1` - Directory/Menu +// `2` - CSO Phone Book Server +// `3` - Error Message +// `4` - BinHexed Macintosh File +// `5` - DOS Binary Archive +// `6` - UNIX uuencoded File +// `7` - Index/Search Server +// `8` - Telnet Session +// `9` - Binary File +// `+` - Mirror/Redundant Server +// `g` - GIF Image +// `I` - Image File (non-GIF) +// `T` - TN3270 Session +// `i` - Informational Message (menu line) +// `h` - HTML File +// `s` - Sound/Music File +// `d` - Document File +// `w` - WHOIS Service +// `;` - Document File with Alternative View +// `<` - Video File +// `M` - MIME File (mail message or similar) +// `:` - Bitmap Image +// `c` - Calendar File +// `p` - PostScript File + +// The most commonly used ones are `0` (text), `1` (directory), `i` (info), and `3` (error). +// The original Gopher protocol only specified types 0-9, `+`, `g`, `I`, and `T`. +// The others were added by various implementations and extensions over time. + +// Error methodology: +// HostError for DNS/network errors +// GopherError for network/gopher errors +// NewError for other errors +// NewFatalError for other fatal errors + +func Visit(url string) (*snapshot.Snapshot, error) { + s, err := snapshot.SnapshotFromURL(url, false) + if err != nil { + return nil, err + } + + data, err := connectAndGetData(url) + if err != nil { + logging.LogDebug("Error: %s", err.Error()) + if IsGopherError(err) || errors2.IsHostError(err) { + s.Error = null.StringFrom(err.Error()) + return s, nil + } + return nil, err + } + + isValidUTF8 := utf8.ValidString(string(data)) + if isValidUTF8 { + s.GemText = null.StringFrom(removeNullChars(string(data))) + } else { + s.Data = null.ValueFrom(data) + } + + if !isValidUTF8 { + return s, nil + } + + responseError := checkForError(string(data)) + if responseError != nil { + s.Error = null.StringFrom(responseError.Error()) + return s, nil + } + + links := getGopherPageLinks(string(data)) + linkURLs := linkList.LinkList(make([]_url.URL, len(links))) + for i, link := range links { + linkURL, err := _url.ParseURL(link, "", true) + if err == nil { + linkURLs[i] = *linkURL + } + } + if len(links) != 0 { + s.Links = null.ValueFrom(linkURLs) + } + + return s, nil +} + +func connectAndGetData(url string) ([]byte, error) { + parsedURL, err := stdurl.Parse(url) + if err != nil { + return nil, errors.NewError(err) + } + + hostname := parsedURL.Hostname() + port := parsedURL.Port() + if port == "" { + port = "70" + } + host := fmt.Sprintf("%s:%s", hostname, port) + timeoutDuration := time.Duration(config.CONFIG.ResponseTimeout) * time.Second + // Establish the underlying TCP connection. + dialer := &net.Dialer{ + Timeout: timeoutDuration, + } + logging.LogDebug("Dialing %s", host) + conn, err := dialer.Dial("tcp", host) + if err != nil { + return nil, errors2.NewHostError(err) + } + // Make sure we always close the connection. + defer func() { + _ = conn.Close() + }() + + // Set read and write timeouts on the TCP connection. + err = conn.SetReadDeadline(time.Now().Add(timeoutDuration)) + if err != nil { + return nil, errors2.NewHostError(err) + } + err = conn.SetWriteDeadline(time.Now().Add(timeoutDuration)) + if err != nil { + return nil, errors2.NewHostError(err) + } + + // We read `buf`-sized chunks and add data to `data`. + buf := make([]byte, 4096) + var data []byte + + // Send Gopher request to trigger server response. + payload := constructPayloadFromPath(parsedURL.Path) + _, err = conn.Write([]byte(fmt.Sprintf("%s\r\n", payload))) + if err != nil { + return nil, errors2.NewHostError(err) + } + // Read response bytes in len(buf) byte chunks + for { + n, err := conn.Read(buf) + if n > 0 { + data = append(data, buf[:n]...) + } + if err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, errors2.NewHostError(err) + } + if len(data) > config.CONFIG.MaxResponseSize { + return nil, errors2.NewHostError(fmt.Errorf("response exceeded max")) + } + } + logging.LogDebug("Got %d bytes", len(data)) + return data, nil +} + +func constructPayloadFromPath(urlpath string) string { + // remove Gopher item type in URL from payload, if one. + re := regexp.MustCompile(`^/[\w]/.*`) + payloadWithoutItemtype := urlpath + if re.Match([]byte(urlpath)) { + payloadWithoutItemtype = strings.Join(strings.Split(urlpath, "/")[2:], "/") + } + if !strings.HasPrefix(payloadWithoutItemtype, "/") { + payloadWithoutItemtype = fmt.Sprintf("/%s", payloadWithoutItemtype) + } + return payloadWithoutItemtype +} + +func checkForError(utfData string) error { + lines := strings.Split(strings.TrimSpace(utfData), "\n") + var firstLine string + if len(lines) > 0 { + firstLine = lines[0] + } else { + return nil + } + if strings.HasPrefix(firstLine, "3") { + split := strings.Split(firstLine, "\t") + return NewGopherError(fmt.Errorf("gopher error: %s", strings.TrimSpace(split[0]))) + } + return nil +} + +func getGopherPageLinks(content string) []string { + var links []string + + lines := strings.Split(strings.TrimSpace(content), "\n") + + for _, line := range lines { + if line == "" || line == "." { + continue + } + + if len(line) < 1 { + continue + } + + itemType := line[0] + if itemType == 'i' { + continue + } + + parts := strings.SplitN(line[1:], "\t", 4) + if len(parts) < 3 { + continue + } + + selector := strings.TrimSpace(parts[1]) + host := strings.TrimSpace(parts[2]) + + if host == "" { + continue + } + + // Handle HTML links first + if itemType == 'h' && strings.HasPrefix(selector, "URL:") { + if url := strings.TrimSpace(selector[4:]); url != "" { + links = append(links, url) + } + continue + } + + // For gopher links, build URL carefully + var url strings.Builder + + // Protocol and host:port + url.WriteString("gopher://") + url.WriteString(host) + url.WriteString(":") + if len(parts) > 3 && strings.TrimSpace(parts[3]) != "" { + url.WriteString(strings.TrimSpace(parts[3])) + } else { + url.WriteString("70") + } + + // Path: always /type + selector + url.WriteString("/") + url.WriteString(string(itemType)) + if strings.HasPrefix(selector, "/") { + url.WriteString(selector) + } else { + url.WriteString("/" + selector) + } + + links = append(links, url.String()) + } + + return links +} + +func removeNullChars(input string) string { + // Replace all null characters with an empty string + return strings.ReplaceAll(input, "\u0000", "") +} diff --git a/gopher/network_test.go b/gopher/network_test.go new file mode 100644 index 0000000..8bfe32e --- /dev/null +++ b/gopher/network_test.go @@ -0,0 +1,298 @@ +package gopher + +import ( + "net" + "testing" + + "gemini-grc/common/errors" + "gemini-grc/config" + "github.com/stretchr/testify/assert" +) + +func TestConstructPayloadFromPath(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "Path with Gopher item type", + input: "/1/path/to/resource", + expected: "/path/to/resource", + }, + { + name: "Path with different item type", + input: "/0/another/path", + expected: "/another/path", + }, + { + name: "Path without item type but with leading slash", + input: "/simple/path", + expected: "/simple/path", + }, + { + name: "Path without item type and without leading slash", + input: "no/leading/slash", + expected: "/no/leading/slash", + }, + { + name: "Empty path", + input: "", + expected: "/", + }, + { + name: "Single character item type", + input: "/h/homepage", + expected: "/homepage", + }, + { + name: "Single slash", + input: "/", + expected: "/", + }, + { + name: "Item type-looking path", + input: "/1", + expected: "/1", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := constructPayloadFromPath(tt.input) + if result != tt.expected { + t.Errorf("constructPayloadFromPath(%q) = %q, want %q", + tt.input, result, tt.expected) + } + }) + } +} + +func TestParseLinks(t *testing.T) { + tests := []struct { + name string + currentURL string + input string + want int // number of expected links + wantErr bool + }{ + { + name: "Empty input", + currentURL: "gopher://example.com:70", + input: "", + want: 0, + wantErr: false, + }, + { + name: "Single directory link", + currentURL: "gopher://example.com:70", + input: "1About Us\t/about\texample.com", + want: 1, + wantErr: false, + }, + { + name: "Single text file link", + currentURL: "gopher://example.com:70", + input: "0README\t/readme.txt\texample.com", + want: 1, + wantErr: false, + }, + { + name: "Multiple links of different types", + currentURL: "gopher://example.com:70", + input: "1About Us\t/about\texample.com\n0README\t/readme.txt\texample.com\n1Contact\t/contact\texample.com", + want: 3, + wantErr: false, + }, + { + name: "Ignore non-linkable types", + currentURL: "gopher://example.com:70", + input: "iInfo line\t/info\texample.com\n1Directory\t/dir\texample.com\n0Text\t/text.txt\texample.com", + want: 2, + wantErr: false, + }, + { + name: "Malformed lines", + currentURL: "gopher://example.com:70", + input: "1Incomplete line\n0No tabs\n1Missing parts\t", + want: 0, + wantErr: false, + }, + { + name: "Mixed valid and invalid lines", + currentURL: "gopher://example.com:70", + input: "1Valid link\t/valid\texample.com\n1Incomplete\t\n0Text file\t/text.txt\texample.com\n1Another valid\t/another\texample.com", + want: 3, + wantErr: false, + }, + { + name: "Absolute URLs", + currentURL: "gopher://example.com:70", + input: "1External link\tgopher://external.com/path\texternal.com\n0Document\tgopher://other.com/doc.txt\tother.com", + want: 2, + wantErr: false, + }, + { + name: "With whitespace", + currentURL: "gopher://example.com:70", + input: " 1Padded line \t/padded\texample.com\n0Text file \t/doc.txt\texample.com", + want: 2, + wantErr: false, + }, + { + name: "Special characters in paths", + currentURL: "gopher://example.com:70", + input: "1Special chars\t/path with spaces\texample.com\n0Doc\t/über/päth.txt\texample.com", + want: 2, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := getGopherPageLinks(tt.input) + assert.Equal(t, tt.want, len(got), "expected %d links, got %d", tt.want, len(got)) + }) + } +} + +func TestCheckForError(t *testing.T) { + tests := []struct { + name string + input string + wantError bool + errorPrefix string + }{ + { + name: "No error", + input: "1Directory\t/dir\texample.com\n0Text\t/text.txt\texample.com", + wantError: false, + errorPrefix: "", + }, + { + name: "Simple error message", + input: "3Error: File not found\t\texample.com", + wantError: true, + errorPrefix: "gopher error: 3Error: File not found", + }, + { + name: "Error with multiple tabs", + input: "3File not found\t/error\texample.com\t70", + wantError: true, + errorPrefix: "gopher error: 3File not found", + }, + { + name: "Error among valid entries", + input: `1Welcome\t/welcome\texample.com +3Access denied\t\texample.com +0README\t/readme.txt\texample.com`, + wantError: false, + errorPrefix: "", + }, + { + name: "Error with no tabs", + input: "3Server is down for maintenance", + wantError: true, + errorPrefix: "gopher error: 3Server is down for maintenance", + }, + { + name: "Multiple errors (should return first)", + input: `3First error\t\texample.com +3Second error\t\texample.com`, + wantError: true, + errorPrefix: "gopher error: 3First error", + }, + { + name: "Error with whitespace", + input: " 3 Error with spaces \t\texample.com", + wantError: true, + errorPrefix: "gopher error: 3 Error with spaces", + }, + { + name: "Empty input", + input: "", + wantError: false, + errorPrefix: "", + }, + { + name: "Just newlines", + input: "\n\n\n", + wantError: false, + errorPrefix: "", + }, + { + name: "Error after empty lines", + input: ` + +3Error after empty lines\t\texample.com`, + wantError: true, + errorPrefix: "gopher error: 3Error after empty lines", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := checkForError(tt.input) + + if !tt.wantError { + assert.NoError(t, err) + return + } + + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.errorPrefix) + }) + } +} + +func TestConnectAndGetDataTimeout(t *testing.T) { + // Start a test server that doesn't respond + listener, err := net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatalf("Failed to start listener: %v", err) + } + defer listener.Close() + + // Accept the connection but don't respond + go func() { + conn, err := listener.Accept() + if err != nil { + t.Logf("Failed to accept connection: %v", err) + return + } + defer conn.Close() + + // Keep the connection open without sending any data to simulate a timeout + select {} + }() + + // Construct the URL of our test server + address := listener.Addr().String() + testURL := "gopher://" + address + "/testpath" + + // Save original config values + originalTimeout := config.CONFIG.ResponseTimeout + originalMaxSize := config.CONFIG.MaxResponseSize + + // Set test config values + config.CONFIG.ResponseTimeout = 1 // Set a very short timeout for this test + config.CONFIG.MaxResponseSize = 1024 // Just for consistency, we won't reach this + + // Test the function + _, err = connectAndGetData(testURL) + + // Reset config values + config.CONFIG.ResponseTimeout = originalTimeout + config.CONFIG.MaxResponseSize = originalMaxSize + + // Check if the error is due to timeout + if err == nil { + t.Error("Expected an error due to timeout, but got no error") + } else if !errors.IsHostError(err) { + t.Errorf("Expected a HostError, but got: %v", err) + } else { + // Here you might want to check if the specific error message contains 'timeout' + // However, since we don't have the exact error string, we're checking the type + t.Logf("Successfully timed out: %v", err) + } +}