Enhance crawler with seed list and SQL utilities

Add seedList module for URL initialization, comprehensive SQL utilities for database analysis, and update project configuration.
This commit is contained in:
antanst
2025-06-16 12:29:33 +03:00
parent dfb050588c
commit 37d5e7cd78
37 changed files with 742 additions and 682 deletions

View File

@@ -0,0 +1,14 @@
-- File: snapshots_date_range.sql
-- Shows snapshot count with date range information for each URL
-- Usage: \i misc/sql/snapshots_date_range.sql
SELECT
url,
COUNT(*) as snapshot_count,
MIN(timestamp) as first_snapshot,
MAX(timestamp) as last_snapshot,
MAX(timestamp) - MIN(timestamp) as time_span
FROM snapshots
GROUP BY url
HAVING COUNT(*) > 1
ORDER BY snapshot_count DESC;