package robotsMatch

import (
	"context"
	"fmt"
	"strings"

	"gemini-grc/common/contextlog"
	"gemini-grc/contextutil"
	"gemini-grc/logging"
)

// ParseRobotsTxt takes robots.txt content and a host, and
// returns a list of full URLs that shouldn't be visited.
// This is the legacy version without context support.
// TODO Also take into account the user agent?
// Check gemini://geminiprotocol.net/docs/companion/robots.gmi
func ParseRobotsTxt(content string, host string) []string {
	// Call the context-aware version with a background context
	return ParseRobotsTxtWithContext(context.Background(), content, host)
}

// ParseRobotsTxtWithContext takes robots.txt content and a host, and
// returns a list of full URLs that shouldn't be visited.
// This version supports context for logging.
// TODO Also take into account the user agent?
// Check gemini://geminiprotocol.net/docs/companion/robots.gmi
func ParseRobotsTxtWithContext(ctx context.Context, content string, host string) []string {
	// Create a context for robots.txt parsing
	parseCtx := contextutil.ContextWithComponent(ctx, "robotsMatch.parser")

	var disallowedPaths []string
	for _, line := range strings.Split(content, "\n") {
		line = strings.TrimSpace(line)
		line = strings.ToLower(line)
		if strings.HasPrefix(line, "disallow:") {
			parts := strings.SplitN(line, ":", 2)
			if len(parts) == 2 {
				path := strings.TrimSpace(parts[1])
				if path != "" {
					// Construct full Gemini URL
					var fullURL string

					// Handle if the path is already a full URL
					if strings.HasPrefix(path, "gemini://") {
						// Extract just the path from the full URL
						urlParts := strings.SplitN(path, "/", 4)
						if len(urlParts) >= 4 {
							// Get the path part (everything after the domain)
							pathPart := "/" + urlParts[3]
							fullURL = fmt.Sprintf("gemini://%s%s", host, pathPart)
						} else {
							// If it's just a domain without a path, skip it or use root path
							fullURL = fmt.Sprintf("gemini://%s/", host)
						}
					} else {
						// It's a relative path, just add it to the host
						if !strings.HasPrefix(path, "/") {
							path = "/" + path
						}
						fullURL = fmt.Sprintf("gemini://%s%s", host, path)
					}

					disallowedPaths = append(disallowedPaths, fullURL)

					// Add additional logging to debug robots.txt parsing
					contextlog.LogDebugWithContext(parseCtx, logging.GetSlogger(), "Added robots.txt disallow rule: %s from original: %s", fullURL, path)
				}
			}
		}
	}
	return disallowedPaths
}