Enhance crawler with seed list and SQL utilities

Add seedList module for URL initialization, comprehensive SQL utilities for database analysis, and update project configuration.
This commit is contained in:
antanst
2025-06-16 12:29:33 +03:00
parent 5e6dabf1e7
commit 8588414b14
37 changed files with 742 additions and 682 deletions

View File

@@ -0,0 +1,13 @@
-- File: recent_snapshot_activity.sql
-- Shows URLs with most snapshots in the last 7 days
-- Usage: \i misc/sql/recent_snapshot_activity.sql
SELECT
url,
COUNT(*) as snapshot_count
FROM snapshots
WHERE timestamp > NOW() - INTERVAL '7 days'
GROUP BY url
HAVING COUNT(*) > 1
ORDER BY snapshot_count DESC
LIMIT 20;