Files
gemini-grc/misc/sql/host_snapshot_stats.sql
antanst 37d5e7cd78 Enhance crawler with seed list and SQL utilities
Add seedList module for URL initialization, comprehensive SQL utilities for database analysis, and update project configuration.
2025-06-16 12:29:33 +03:00

20 lines
568 B
SQL

-- File: host_snapshot_stats.sql
-- Groups snapshots by hosts and shows URLs with multiple snapshots
-- Usage: \i misc/sql/host_snapshot_stats.sql
SELECT
host,
COUNT(DISTINCT url) as unique_urls,
SUM(CASE WHEN url_count > 1 THEN 1 ELSE 0 END) as urls_with_multiple_snapshots,
SUM(snapshot_count) as total_snapshots
FROM (
SELECT
host,
url,
COUNT(*) as snapshot_count,
COUNT(*) OVER (PARTITION BY url) as url_count
FROM snapshots
GROUP BY host, url
) subquery
GROUP BY host
ORDER BY total_snapshots DESC;