Files
gemini-grc/misc/sql/snapshot_distribution.sql
antanst 8588414b14 Enhance crawler with seed list and SQL utilities
Add seedList module for URL initialization, comprehensive SQL utilities for database analysis, and update project configuration.
2025-06-29 22:38:38 +03:00

16 lines
458 B
SQL

-- File: snapshot_distribution.sql
-- Shows the distribution of snapshots per URL (how many URLs have 1, 2, 3, etc. snapshots)
-- Usage: \i misc/sql/snapshot_distribution.sql
WITH counts AS (
SELECT url, COUNT(*) as snapshot_count
FROM snapshots
GROUP BY url
)
SELECT
snapshot_count,
COUNT(*) as url_count,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) as percentage
FROM counts
GROUP BY snapshot_count
ORDER BY snapshot_count;