Add robots.txt parsing and matching functionality
- Create separate robotsMatch package for robots.txt handling - Implement robots.txt parsing with support for different directives - Add support for both Allow and Disallow patterns - Include robots.txt matching with efficient pattern matching - Add test cases for robots matching
This commit is contained in:
57
robotsMatch/robots_test.go
Normal file
57
robotsMatch/robots_test.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package robotsMatch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseRobotsTxt(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := `User-agent: *
|
||||
Disallow: /cgi-bin/wp.cgi/view
|
||||
Disallow: /cgi-bin/wp.cgi/media
|
||||
User-agent: googlebot
|
||||
Disallow: /admin/`
|
||||
|
||||
expected := []string{
|
||||
"gemini://example.com/cgi-bin/wp.cgi/view",
|
||||
"gemini://example.com/cgi-bin/wp.cgi/media",
|
||||
"gemini://example.com/admin/",
|
||||
}
|
||||
|
||||
result := ParseRobotsTxt(input, "example.com")
|
||||
|
||||
if !reflect.DeepEqual(result, expected) {
|
||||
t.Errorf("ParseRobotsTxt() = %v, want %v", result, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRobotsTxtEmpty(t *testing.T) {
|
||||
t.Parallel()
|
||||
input := ``
|
||||
|
||||
result := ParseRobotsTxt(input, "example.com")
|
||||
|
||||
if len(result) != 0 {
|
||||
t.Errorf("ParseRobotsTxt() = %v, want empty []string", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsURLblocked(t *testing.T) {
|
||||
t.Parallel()
|
||||
disallowedURLs := []string{
|
||||
"gemini://example.com/cgi-bin/wp.cgi/view",
|
||||
"gemini://example.com/cgi-bin/wp.cgi/media",
|
||||
"gemini://example.com/admin/",
|
||||
}
|
||||
ctx := context.Background()
|
||||
url := "gemini://example.com/admin/index.html"
|
||||
if !isURLblocked(ctx, disallowedURLs, url) {
|
||||
t.Errorf("Expected %s to be blocked", url)
|
||||
}
|
||||
url = "gemini://example1.com/admin/index.html"
|
||||
if isURLblocked(ctx, disallowedURLs, url) {
|
||||
t.Errorf("expected %s to not be blocked", url)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user