This commit is contained in:
2024-11-18 16:28:45 +02:00
parent f0452ff9f7
commit 825c7e3391
34 changed files with 624 additions and 426 deletions

View File

@@ -0,0 +1,18 @@
-- Step 1: Delete duplicate entries, keeping the last one based on timestamp
-- Use a CTE to mark duplicates and delete them efficiently
WITH ranked_snapshots AS (
SELECT
id,
url,
ROW_NUMBER() OVER(PARTITION BY url ORDER BY timestamp DESC) AS row_num
FROM
snapshots
)
DELETE FROM snapshots
USING ranked_snapshots
WHERE snapshots.id = ranked_snapshots.id
AND ranked_snapshots.row_num > 1;
-- Step 2: Add a unique constraint on the url column to prevent future duplicates
ALTER TABLE snapshots
ADD CONSTRAINT unique_url UNIQUE (url);