Add robots.txt parsing and matching functionality
- Create separate robotsMatch package for robots.txt handling - Implement robots.txt parsing with support for different directives - Add support for both Allow and Disallow patterns - Include robots.txt matching with efficient pattern matching - Add test cases for robots matching
This commit is contained in:
49
robotsMatch/robotsMatch_test.go
Normal file
49
robotsMatch/robotsMatch_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package robotsMatch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"gemini-grc/config"
|
||||
)
|
||||
|
||||
func TestInitializeShutdown(t *testing.T) {
|
||||
err := Initialize()
|
||||
if err != nil {
|
||||
t.Errorf("Initialize() failed: %v", err)
|
||||
}
|
||||
|
||||
err = Shutdown()
|
||||
if err != nil {
|
||||
t.Errorf("Shutdown() failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRobotMatch_EmptyCache(t *testing.T) {
|
||||
// This test doesn't actually connect to gemini URLs due to the complexity
|
||||
// of mocking the gemini client, but tests the caching behavior when no
|
||||
// robots.txt is found (empty cache case)
|
||||
config.CONFIG.ResponseTimeout = 5
|
||||
|
||||
// Clear the cache before testing
|
||||
RobotsCache = sync.Map{}
|
||||
|
||||
// For empty cache or DNS errors, RobotMatch should return false (allow the URL) without an error
|
||||
ctx := context.Background()
|
||||
blocked, err := RobotMatch(ctx, "gemini://nonexistent.example.com/")
|
||||
// We expect no error for non-existent host because we changed our error handling
|
||||
// to be more tolerant of DNS/connectivity issues
|
||||
if err != nil {
|
||||
// The only errors we should get are context-related (timeout, cancellation)
|
||||
if !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) {
|
||||
t.Errorf("Expected nil error for non-existent host, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// The URL should be allowed (not blocked) when robots.txt can't be fetched
|
||||
if blocked {
|
||||
t.Errorf("Expected URL to be allowed when robots.txt can't be fetched")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user