19 lines
575 B
SQL
19 lines
575 B
SQL
-- Step 1: Delete duplicate entries, keeping the last one based on timestamp
|
|
-- Use a CTE to mark duplicates and delete them efficiently
|
|
WITH ranked_snapshots AS (
|
|
SELECT
|
|
id,
|
|
url,
|
|
ROW_NUMBER() OVER(PARTITION BY url ORDER BY timestamp DESC) AS row_num
|
|
FROM
|
|
snapshots
|
|
)
|
|
DELETE FROM snapshots
|
|
USING ranked_snapshots
|
|
WHERE snapshots.id = ranked_snapshots.id
|
|
AND ranked_snapshots.row_num > 1;
|
|
|
|
-- Step 2: Add a unique constraint on the url column to prevent future duplicates
|
|
ALTER TABLE snapshots
|
|
ADD CONSTRAINT unique_url UNIQUE (url);
|