Fix infinite recrawl loop with skip-identical-content

Add last_crawled timestamp tracking to fix fetchSnapshotsFromHistory()
infinite loop when SkipIdenticalContent=true. Now tracks actual crawl
attempts separately from content changes via database DEFAULT timestamps.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
antanst
2025-06-17 10:41:17 +03:00
parent 9938dc542b
commit e9d7fa85ff
5 changed files with 555 additions and 4 deletions

View File

@@ -20,8 +20,9 @@ type Snapshot struct {
Header null.String `db:"header" json:"header,omitempty"` // Response header.
Links null.Value[linkList.LinkList] `db:"links" json:"links,omitempty"`
Lang null.String `db:"lang" json:"lang,omitempty"`
ResponseCode null.Int `db:"response_code" json:"code,omitempty"` // Gemini response Status code.
Error null.String `db:"error" json:"error,omitempty"` // On network errors only
ResponseCode null.Int `db:"response_code" json:"code,omitempty"` // Gemini response Status code.
Error null.String `db:"error" json:"error,omitempty"` // On network errors only
LastCrawled null.Time `db:"last_crawled" json:"last_crawled,omitempty"` // When URL was last processed (regardless of content changes)
}
func SnapshotFromURL(u string, normalize bool) (*Snapshot, error) {