Add seedList module for URL initialization, comprehensive SQL utilities for database analysis, and update project configuration.
20 lines
729 B
SQL
20 lines
729 B
SQL
-- File: storage_efficiency.sql
|
|
-- Shows potential storage savings from deduplication
|
|
-- Usage: \i misc/sql/storage_efficiency.sql
|
|
|
|
WITH duplicate_stats AS (
|
|
SELECT
|
|
url,
|
|
COUNT(*) as snapshot_count,
|
|
COUNT(DISTINCT gemtext) as unique_gemtexts,
|
|
COUNT(DISTINCT data) as unique_datas
|
|
FROM snapshots
|
|
GROUP BY url
|
|
HAVING COUNT(*) > 1
|
|
)
|
|
SELECT
|
|
SUM(snapshot_count) as total_snapshots,
|
|
SUM(unique_gemtexts + unique_datas) as unique_contents,
|
|
SUM(snapshot_count) - SUM(unique_gemtexts + unique_datas) as duplicate_content_count,
|
|
ROUND((SUM(snapshot_count) - SUM(unique_gemtexts + unique_datas)) * 100.0 / SUM(snapshot_count), 2) as duplicate_percentage
|
|
FROM duplicate_stats; |