Files
gemini-grc/robotsMatch/robots_test.go
antanst fe40874844 Add robots.txt parsing and matching functionality
- Create separate robotsMatch package for robots.txt handling
- Implement robots.txt parsing with support for different directives
- Add support for both Allow and Disallow patterns
- Include robots.txt matching with efficient pattern matching
- Add test cases for robots matching
2025-05-22 12:46:21 +03:00

58 lines
1.3 KiB
Go

package robotsMatch
import (
"context"
"reflect"
"testing"
)
func TestParseRobotsTxt(t *testing.T) {
t.Parallel()
input := `User-agent: *
Disallow: /cgi-bin/wp.cgi/view
Disallow: /cgi-bin/wp.cgi/media
User-agent: googlebot
Disallow: /admin/`
expected := []string{
"gemini://example.com/cgi-bin/wp.cgi/view",
"gemini://example.com/cgi-bin/wp.cgi/media",
"gemini://example.com/admin/",
}
result := ParseRobotsTxt(input, "example.com")
if !reflect.DeepEqual(result, expected) {
t.Errorf("ParseRobotsTxt() = %v, want %v", result, expected)
}
}
func TestParseRobotsTxtEmpty(t *testing.T) {
t.Parallel()
input := ``
result := ParseRobotsTxt(input, "example.com")
if len(result) != 0 {
t.Errorf("ParseRobotsTxt() = %v, want empty []string", result)
}
}
func TestIsURLblocked(t *testing.T) {
t.Parallel()
disallowedURLs := []string{
"gemini://example.com/cgi-bin/wp.cgi/view",
"gemini://example.com/cgi-bin/wp.cgi/media",
"gemini://example.com/admin/",
}
ctx := context.Background()
url := "gemini://example.com/admin/index.html"
if !isURLblocked(ctx, disallowedURLs, url) {
t.Errorf("Expected %s to be blocked", url)
}
url = "gemini://example1.com/admin/index.html"
if isURLblocked(ctx, disallowedURLs, url) {
t.Errorf("expected %s to not be blocked", url)
}
}