Files
gemini-grc/bin/normalizeSnapshot/main.go

117 lines
2.9 KiB
Go

package main
import (
"fmt"
"os"
"gemini-grc/gemini"
_ "github.com/jackc/pgx/v5/stdlib" // PGX driver for PostgreSQL
"github.com/jmoiron/sqlx"
)
// Populates the `host` field
func main() {
db := connectToDB()
count := 0
for {
tx := db.MustBegin()
query := `
SELECT * FROM snapshots
ORDER BY id
LIMIT 10000 OFFSET $1
`
var snapshots []gemini.Snapshot
err := tx.Select(&snapshots, query, count)
if err != nil {
printErrorAndExit(tx, err)
}
if len(snapshots) == 0 {
fmt.Println("Done!")
return
}
for _, s := range snapshots {
count++
escaped := gemini.EscapeURL(s.URL.String())
normalizedGeminiURL, err := gemini.ParseURL(escaped, "")
if err != nil {
fmt.Println(s.URL.String())
fmt.Println(escaped)
printErrorAndExit(tx, err)
}
normalizedURLString := normalizedGeminiURL.String()
// If URL is already normalized, skip snapshot
if normalizedURLString == s.URL.String() {
// fmt.Printf("[%5d] Skipping %d %s\n", count, s.ID, s.URL.String())
continue
}
// If a snapshot already exists with the normalized
// URL, delete the current snapshot and leave the other.
var ss []gemini.Snapshot
err = tx.Select(&ss, "SELECT * FROM snapshots WHERE URL=$1", normalizedURLString)
if err != nil {
printErrorAndExit(tx, err)
}
if len(ss) > 0 {
tx.MustExec("DELETE FROM snapshots WHERE id=$1", s.ID)
fmt.Printf("%d Deleting %d %s\n", count, s.ID, s.URL.String())
//err = tx.Commit()
//if err != nil {
// printErrorAndExit(tx, err)
//}
//return
continue
}
// fmt.Printf("%s =>\n%s\n", s.URL.String(), normalizedURLString)
// At this point we just update the snapshot,
// and the normalized URL will be saved.
fmt.Printf("%d Updating %d %s => %s\n", count, s.ID, s.URL.String(), normalizedURLString)
// Saves the snapshot with the normalized URL
tx.MustExec("DELETE FROM snapshots WHERE id=$1", s.ID)
s.URL = *normalizedGeminiURL
err = gemini.UpsertSnapshot(0, tx, &s)
if err != nil {
printErrorAndExit(tx, err)
}
//err = tx.Commit()
//if err != nil {
// printErrorAndExit(tx, err)
//}
//return
}
err = tx.Commit()
if err != nil {
printErrorAndExit(tx, err)
}
}
}
func printErrorAndExit(tx *sqlx.Tx, err error) {
_ = tx.Rollback()
panic(err)
}
func connectToDB() *sqlx.DB {
connStr := fmt.Sprintf("postgres://%s:%s@%s:%s/%s",
os.Getenv("PG_USER"),
os.Getenv("PG_PASSWORD"),
os.Getenv("PG_HOST"),
os.Getenv("PG_PORT"),
os.Getenv("PG_DATABASE"),
)
// Create a connection pool
db, err := sqlx.Open("pgx", connStr)
if err != nil {
panic(fmt.Sprintf("Unable to connect to database with URL %s: %v\n", connStr, err))
}
db.SetMaxOpenConns(20)
err = db.Ping()
if err != nil {
panic(fmt.Sprintf("Unable to ping database: %v\n", err))
}
fmt.Println("Connected to database")
return db
}