Files
gemini-grc/common/seedList/seedlist.go
antanst 8588414b14 Enhance crawler with seed list and SQL utilities
Add seedList module for URL initialization, comprehensive SQL utilities for database analysis, and update project configuration.
2025-06-29 22:38:38 +03:00

68 lines
1.2 KiB
Go

package seedList
import (
"fmt"
"os"
"strings"
"git.antanst.com/antanst/logging"
"git.antanst.com/antanst/xerrors"
)
var seedlist []string //nolint:gochecknoglobals
func Initialize() error {
var err error
// Initialize seedlist from fixed path
if err = loadSeedlist("seed_urls.txt"); err != nil {
return err
}
return nil
}
func loadSeedlist(filePath string) error {
if seedlist != nil {
return nil
}
data, err := os.ReadFile(filePath)
if err != nil {
seedlist = []string{}
return xerrors.NewError(fmt.Errorf("could not load seedlist file: %w", err), 0, "", true)
}
lines := strings.Split(string(data), "\n")
seedlist = []string{}
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
seedlist = append(seedlist, line)
}
if len(seedlist) > 0 {
logging.LogInfo("Loaded %d seed URLs", len(seedlist))
}
return nil
}
func Shutdown() error {
return nil
}
// GetSeedURLs returns the list of seed URLs
func GetSeedURLs() []string {
if seedlist == nil {
return []string{}
}
// Return a copy to prevent external modification
result := make([]string, len(seedlist))
copy(result, seedlist)
return result
}