Implement context-aware database operations
- Add context support to database operations - Implement versioned snapshots for URL history - Update database queries to support URL timestamps - Improve transaction handling with context - Add utility functions for snapshot history
This commit is contained in:
117
db/db_queries.go
117
db/db_queries.go
@@ -8,7 +8,7 @@ WHERE u.id IN (
|
||||
SELECT id FROM (
|
||||
SELECT id, ROW_NUMBER() OVER (PARTITION BY host ORDER BY id) as rn
|
||||
FROM urls
|
||||
) t
|
||||
) t
|
||||
WHERE rn <= 3
|
||||
)
|
||||
ORDER BY RANDOM()
|
||||
@@ -18,30 +18,71 @@ LIMIT $1
|
||||
SQL_SELECT_RANDOM_URLS = `
|
||||
SELECT url
|
||||
FROM urls u
|
||||
WHERE u.being_processed IS NOT TRUE
|
||||
ORDER BY RANDOM()
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT $1
|
||||
`
|
||||
SQL_INSERT_SNAPSHOT_IF_NEW = `
|
||||
INSERT INTO snapshots (url, host, timestamp, mimetype, data, gemtext, links, lang, response_code, error)
|
||||
VALUES (:url, :host, :timestamp, :mimetype, :data, :gemtext, :links, :lang, :response_code, :error)
|
||||
ON CONFLICT (url) DO NOTHING
|
||||
`
|
||||
SQL_UPSERT_SNAPSHOT = `INSERT INTO snapshots (url, host, timestamp, mimetype, data, gemtext, links, lang, response_code, error)
|
||||
VALUES (:url, :host, :timestamp, :mimetype, :data, :gemtext, :links, :lang, :response_code, :error)
|
||||
ON CONFLICT (url) DO UPDATE SET
|
||||
url = EXCLUDED.url,
|
||||
host = EXCLUDED.host,
|
||||
timestamp = EXCLUDED.timestamp,
|
||||
mimetype = EXCLUDED.mimetype,
|
||||
data = EXCLUDED.data,
|
||||
gemtext = EXCLUDED.gemtext,
|
||||
links = EXCLUDED.links,
|
||||
lang = EXCLUDED.lang,
|
||||
response_code = EXCLUDED.response_code,
|
||||
error = EXCLUDED.error
|
||||
RETURNING id
|
||||
SQL_MARK_URLS_BEING_PROCESSED = `UPDATE urls SET being_processed = true WHERE url IN (%s)`
|
||||
SQL_SELECT_RANDOM_URLS_GEMINI_ONLY = `
|
||||
SELECT url
|
||||
FROM urls u
|
||||
WHERE u.url like 'gemini://%'
|
||||
AND u.being_processed IS NOT TRUE
|
||||
ORDER BY RANDOM()
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT $1
|
||||
`
|
||||
SQL_SELECT_RANDOM_URLS_GEMINI_ONLY_2 = `
|
||||
WITH RankedUrls AS (
|
||||
-- Step 1: Assign a random rank to each URL within its host group
|
||||
SELECT
|
||||
url,
|
||||
host,
|
||||
ROW_NUMBER() OVER (PARTITION BY host ORDER BY RANDOM()) as rn
|
||||
FROM
|
||||
urls
|
||||
WHERE url like 'gemini://%'
|
||||
AND being_processed IS NOT TRUE
|
||||
),
|
||||
OneUrlPerHost AS (
|
||||
-- Step 2: Filter to keep only the first-ranked (random) URL per host
|
||||
SELECT
|
||||
url,
|
||||
host
|
||||
FROM
|
||||
RankedUrls
|
||||
WHERE
|
||||
rn = 1
|
||||
)
|
||||
-- Step 3: From the set of one URL per host, randomly select X
|
||||
SELECT
|
||||
url
|
||||
FROM
|
||||
OneUrlPerHost
|
||||
ORDER BY
|
||||
RANDOM()
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT $1
|
||||
`
|
||||
// New query - always insert a new snapshot without conflict handling
|
||||
SQL_INSERT_SNAPSHOT = `
|
||||
INSERT INTO snapshots (url, host, timestamp, mimetype, data, gemtext, links, lang, response_code, error, header)
|
||||
VALUES (:url, :host, :timestamp, :mimetype, :data, :gemtext, :links, :lang, :response_code, :error, :header)
|
||||
RETURNING id
|
||||
`
|
||||
// Keep for backward compatibility, but should be phased out
|
||||
SQL_INSERT_SNAPSHOT_IF_NEW = `
|
||||
INSERT INTO snapshots (url, host, timestamp, mimetype, data, gemtext, links, lang, response_code, error, header)
|
||||
VALUES (:url, :host, :timestamp, :mimetype, :data, :gemtext, :links, :lang, :response_code, :error, :header)
|
||||
ON CONFLICT DO NOTHING
|
||||
`
|
||||
// Update to match the SQL_INSERT_SNAPSHOT - we no longer want to upsert, just insert new versions
|
||||
SQL_UPSERT_SNAPSHOT = `
|
||||
INSERT INTO snapshots (url, host, timestamp, mimetype, data, gemtext, links, lang, response_code, error, header)
|
||||
VALUES (:url, :host, :timestamp, :mimetype, :data, :gemtext, :links, :lang, :response_code, :error, :header)
|
||||
RETURNING id
|
||||
`
|
||||
SQL_UPDATE_SNAPSHOT = `UPDATE snapshots
|
||||
SET url = :url,
|
||||
host = :host,
|
||||
@@ -52,7 +93,8 @@ gemtext = :gemtext,
|
||||
links = :links,
|
||||
lang = :lang,
|
||||
response_code = :response_code,
|
||||
error = :error
|
||||
error = :error,
|
||||
header = :header
|
||||
WHERE id = :id
|
||||
RETURNING id
|
||||
`
|
||||
@@ -60,8 +102,41 @@ RETURNING id
|
||||
INSERT INTO urls (url, host, timestamp)
|
||||
VALUES (:url, :host, :timestamp)
|
||||
ON CONFLICT (url) DO NOTHING
|
||||
`
|
||||
SQL_UPDATE_URL = `
|
||||
UPDATE urls
|
||||
SET url = :NormalizedURL
|
||||
WHERE url = :Url
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM urls WHERE url = :NormalizedURL
|
||||
)
|
||||
`
|
||||
SQL_DELETE_URL = `
|
||||
DELETE FROM urls WHERE url=$1
|
||||
`
|
||||
// New queries for retrieving snapshots
|
||||
SQL_GET_LATEST_SNAPSHOT = `
|
||||
SELECT * FROM snapshots
|
||||
WHERE url = $1
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT 1
|
||||
`
|
||||
SQL_GET_SNAPSHOT_AT_TIMESTAMP = `
|
||||
SELECT * FROM snapshots
|
||||
WHERE url = $1
|
||||
AND timestamp <= $2
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT 1
|
||||
`
|
||||
SQL_GET_ALL_SNAPSHOTS_FOR_URL = `
|
||||
SELECT * FROM snapshots
|
||||
WHERE url = $1
|
||||
ORDER BY timestamp DESC
|
||||
`
|
||||
SQL_GET_SNAPSHOTS_BY_DATE_RANGE = `
|
||||
SELECT * FROM snapshots
|
||||
WHERE url = $1
|
||||
AND timestamp BETWEEN $2 AND $3
|
||||
ORDER BY timestamp DESC
|
||||
`
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user