Revert "refactor: Improve robots.txt parsing and caching"
This reverts commit 6a96fb26cc.
This commit is contained in:
@@ -5,49 +5,27 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ParseRobotsTxt(content string) *RobotsData {
|
// Takes robots.txt content and a host, and
|
||||||
data := &RobotsData{}
|
// returns a list of full URLs that shouldn't
|
||||||
var currentUserAgent string
|
// be visited.
|
||||||
|
// TODO Also take into account the user agent?
|
||||||
for _, line := range strings.Split(content, "\n") {
|
// Check gemini://geminiprotocol.net/docs/companion/robots.gmi
|
||||||
line = strings.TrimSpace(line)
|
func ParseRobotsTxt(content string, host string) []string {
|
||||||
if line == "" || strings.HasPrefix(line, "#") {
|
var disallowedPaths []string
|
||||||
continue
|
for _, line := range strings.Split(content, "\n") {
|
||||||
}
|
line = strings.TrimSpace(line)
|
||||||
|
line = strings.ToLower(line)
|
||||||
parts := strings.SplitN(line, ":", 2)
|
if strings.HasPrefix(line, "disallow:") {
|
||||||
if len(parts) != 2 {
|
parts := strings.SplitN(line, ":", 2)
|
||||||
continue
|
if len(parts) == 2 {
|
||||||
}
|
path := strings.TrimSpace(parts[1])
|
||||||
|
if path != "" {
|
||||||
directive := strings.TrimSpace(strings.ToLower(parts[0]))
|
// Construct full Gemini URL
|
||||||
value := strings.TrimSpace(parts[1])
|
disallowedPaths = append(disallowedPaths,
|
||||||
|
fmt.Sprintf("gemini://%s%s", host, path))
|
||||||
switch directive {
|
}
|
||||||
case "user-agent":
|
}
|
||||||
currentUserAgent = value
|
}
|
||||||
case "allow":
|
}
|
||||||
if value != "" {
|
return disallowedPaths
|
||||||
data.Rules = append(data.Rules, RobotRule{
|
|
||||||
UserAgent: currentUserAgent,
|
|
||||||
Allow: true,
|
|
||||||
Path: value,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
case "disallow":
|
|
||||||
if value != "" {
|
|
||||||
data.Rules = append(data.Rules, RobotRule{
|
|
||||||
UserAgent: currentUserAgent,
|
|
||||||
Allow: false,
|
|
||||||
Path: value,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
case "crawl-delay":
|
|
||||||
if delay, err := strconv.Atoi(value); err == nil {
|
|
||||||
data.CrawlDelay = delay
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return data
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user