package gemini import ( "encoding/json" "fmt" "gemini-grc/config" "os" "gemini-grc/logging" _ "github.com/jackc/pgx/v5/stdlib" // PGX driver for PostgreSQL "github.com/jmoiron/sqlx" ) func ConnectToDB() *sqlx.DB { connStr := fmt.Sprintf("postgres://%s:%s@%s:%s/%s", os.Getenv("PG_USER"), os.Getenv("PG_PASSWORD"), os.Getenv("PG_HOST"), os.Getenv("PG_PORT"), os.Getenv("PG_DATABASE"), ) // Create a connection pool db, err := sqlx.Open("pgx", connStr) if err != nil { panic(fmt.Sprintf("Unable to connect to database with URL %s: %v\n", connStr, err)) } db.SetMaxOpenConns(20) err = db.Ping() if err != nil { panic(fmt.Sprintf("Unable to ping database: %v\n", err)) } logging.LogDebug("Connected to database") return db } func SaveSnapshotIfNew(tx *sqlx.Tx, s *Snapshot) error { marshalled, err := json.MarshalIndent(s, "", " ") if err != nil { panic(fmt.Sprintf("JSON serialization error for %v", s)) } if config.CONFIG.DryRun { logging.LogDebug("Would insert (if new) snapshot %s", marshalled) return nil } query := ` INSERT INTO snapshots (url, host, timestamp, mimetype, data, gemtext, links, lang, response_code, error) VALUES (:url, :host, :timestamp, :mimetype, :data, :gemtext, :links, :lang, :response_code, :error) ON CONFLICT (url) DO NOTHING ` _, err = tx.NamedExec(query, s) if err != nil { return fmt.Errorf("[%s] GeminiError inserting snapshot: %w", s.URL, err) } return nil } func UpsertSnapshot(id int, tx *sqlx.Tx, s *Snapshot) error { marshalled, err := json.MarshalIndent(s, "", " ") if err != nil { panic(fmt.Sprintf("JSON serialization error for %v", s)) } if config.CONFIG.DryRun { logging.LogDebug("[%d] Would upsert snapshot %s", id, marshalled) return nil } query := ` INSERT INTO snapshots (url, host, timestamp, mimetype, data, gemtext, links, lang, response_code, error) VALUES (:url, :host, :timestamp, :mimetype, :data, :gemtext, :links, :lang, :response_code, :error) ON CONFLICT (url) DO UPDATE SET url = EXCLUDED.url, host = EXCLUDED.host, timestamp = EXCLUDED.timestamp, mimetype = EXCLUDED.mimetype, data = EXCLUDED.data, gemtext = EXCLUDED.gemtext, links = EXCLUDED.links, lang = EXCLUDED.lang, response_code = EXCLUDED.response_code, error = EXCLUDED.error` _, err = tx.NamedExec(query, s) //if err != nil { // logging.LogError("[%s] GeminiError upserting snapshot: %w", s.URL, err) // panic("This shouldn't happen") //} if err != nil { return fmt.Errorf("[%s] GeminiError upserting snapshot: %w", s.URL, err) } return nil } func SaveLinksToDBinBatches(tx *sqlx.Tx, snapshots []*Snapshot) error { if config.CONFIG.DryRun { return nil } // Approximately 5,957 rows maximum (65535/11 parameters), use 5000 to be safe const batchSize = 5000 query := ` INSERT INTO snapshots (url, host, timestamp, mimetype, data, gemtext, links, lang, response_code, error) VALUES (:url, :host, :timestamp, :mimetype, :data, :gemtext, :links, :lang, :response_code, :error) ON CONFLICT (url) DO NOTHING ` for i := 0; i < len(snapshots); i += batchSize { end := i + batchSize if end > len(snapshots) { end = len(snapshots) } batch := snapshots[i:end] _, err := tx.NamedExec(query, batch) if err != nil { logging.LogError("GeminiError batch inserting snapshots: %w", err) return fmt.Errorf("DB error: %w", err) } } return nil } func SaveLinksToDB(tx *sqlx.Tx, snapshots []*Snapshot) error { if config.CONFIG.DryRun { return nil } query := ` INSERT INTO snapshots (url, host, timestamp, mimetype, data, gemtext, links, lang, response_code, error) VALUES (:url, :host, :timestamp, :mimetype, :data, :gemtext, :links, :lang, :response_code, :error) ON CONFLICT (url) DO NOTHING ` _, err := tx.NamedExec(query, snapshots) if err != nil { logging.LogError("GeminiError batch inserting snapshots: %w", err) return fmt.Errorf("DB error: %w", err) } return nil }