Files
gemini-grs/db/migrate2_unique_urls.sql
2024-11-18 16:28:45 +02:00

19 lines
575 B
SQL

-- Step 1: Delete duplicate entries, keeping the last one based on timestamp
-- Use a CTE to mark duplicates and delete them efficiently
WITH ranked_snapshots AS (
SELECT
id,
url,
ROW_NUMBER() OVER(PARTITION BY url ORDER BY timestamp DESC) AS row_num
FROM
snapshots
)
DELETE FROM snapshots
USING ranked_snapshots
WHERE snapshots.id = ranked_snapshots.id
AND ranked_snapshots.row_num > 1;
-- Step 2: Add a unique constraint on the url column to prevent future duplicates
ALTER TABLE snapshots
ADD CONSTRAINT unique_url UNIQUE (url);