- Create separate robotsMatch package for robots.txt handling - Implement robots.txt parsing with support for different directives - Add support for both Allow and Disallow patterns - Include robots.txt matching with efficient pattern matching - Add test cases for robots matching
58 lines
1.3 KiB
Go
58 lines
1.3 KiB
Go
package robotsMatch
|
|
|
|
import (
|
|
"context"
|
|
"reflect"
|
|
"testing"
|
|
)
|
|
|
|
func TestParseRobotsTxt(t *testing.T) {
|
|
t.Parallel()
|
|
input := `User-agent: *
|
|
Disallow: /cgi-bin/wp.cgi/view
|
|
Disallow: /cgi-bin/wp.cgi/media
|
|
User-agent: googlebot
|
|
Disallow: /admin/`
|
|
|
|
expected := []string{
|
|
"gemini://example.com/cgi-bin/wp.cgi/view",
|
|
"gemini://example.com/cgi-bin/wp.cgi/media",
|
|
"gemini://example.com/admin/",
|
|
}
|
|
|
|
result := ParseRobotsTxt(input, "example.com")
|
|
|
|
if !reflect.DeepEqual(result, expected) {
|
|
t.Errorf("ParseRobotsTxt() = %v, want %v", result, expected)
|
|
}
|
|
}
|
|
|
|
func TestParseRobotsTxtEmpty(t *testing.T) {
|
|
t.Parallel()
|
|
input := ``
|
|
|
|
result := ParseRobotsTxt(input, "example.com")
|
|
|
|
if len(result) != 0 {
|
|
t.Errorf("ParseRobotsTxt() = %v, want empty []string", result)
|
|
}
|
|
}
|
|
|
|
func TestIsURLblocked(t *testing.T) {
|
|
t.Parallel()
|
|
disallowedURLs := []string{
|
|
"gemini://example.com/cgi-bin/wp.cgi/view",
|
|
"gemini://example.com/cgi-bin/wp.cgi/media",
|
|
"gemini://example.com/admin/",
|
|
}
|
|
ctx := context.Background()
|
|
url := "gemini://example.com/admin/index.html"
|
|
if !isURLblocked(ctx, disallowedURLs, url) {
|
|
t.Errorf("Expected %s to be blocked", url)
|
|
}
|
|
url = "gemini://example1.com/admin/index.html"
|
|
if isURLblocked(ctx, disallowedURLs, url) {
|
|
t.Errorf("expected %s to not be blocked", url)
|
|
}
|
|
}
|