-- Step 1: Delete duplicate entries, keeping the last one based on timestamp -- Use a CTE to mark duplicates and delete them efficiently WITH ranked_snapshots AS ( SELECT id, url, ROW_NUMBER() OVER(PARTITION BY url ORDER BY timestamp DESC) AS row_num FROM snapshots ) DELETE FROM snapshots USING ranked_snapshots WHERE snapshots.id = ranked_snapshots.id AND ranked_snapshots.row_num > 1; -- Step 2: Add a unique constraint on the url column to prevent future duplicates ALTER TABLE snapshots ADD CONSTRAINT unique_url UNIQUE (url);