Implement structured logging with slog

- Replace zerolog with Go's standard slog package
- Add ColorHandler for terminal color output
- Add context-aware logging system
- Format attributes on the same line as log messages
- Use green color for INFO level logs
- Set up context value extraction helpers

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-05-22 12:44:08 +03:00
parent ad224a328e
commit a55f820f62
7 changed files with 393 additions and 322 deletions

View File

@@ -0,0 +1,112 @@
package contextlog
import (
"context"
"fmt"
"log/slog"
"gemini-grc/contextutil"
)
// SlogEventWithContext adds context information as structured fields to the log event.
func SlogEventWithContext(ctx context.Context, logger *slog.Logger) *slog.Logger {
// Start with the provided logger
if logger == nil {
// If logger isn't initialized, use the default logger
return slog.Default()
}
// Get context values - will be added directly to log records
host := contextutil.GetHostFromContext(ctx)
requestID := contextutil.GetRequestIDFromContext(ctx)
component := contextutil.GetComponentFromContext(ctx)
workerID := contextutil.GetWorkerIDFromContext(ctx)
url := contextutil.GetURLFromContext(ctx)
// Add all context fields to the logger
if host != "" {
logger = logger.With("host", host)
}
if requestID != "" {
logger = logger.With("request_id", requestID)
}
if workerID >= 0 {
logger = logger.With("worker_id", workerID)
}
if component != "" {
logger = logger.With("component", component)
}
if url != "" {
logger = logger.With("url", url)
}
return logger
}
// LogDebugWithContext logs a debug message with context information.
func LogDebugWithContext(ctx context.Context, logger *slog.Logger, format string, args ...interface{}) {
if logger == nil {
return
}
// Create logger with context fields
contextLogger := SlogEventWithContext(ctx, logger)
// Format the message
message := fmt.Sprintf(format, args...)
// Log with context data in the record attributes
contextLogger.Debug(message)
}
// LogInfoWithContext logs an info message with context information.
func LogInfoWithContext(ctx context.Context, logger *slog.Logger, format string, args ...interface{}) {
if logger == nil {
return
}
// Create logger with context fields
contextLogger := SlogEventWithContext(ctx, logger)
// Format the message
message := fmt.Sprintf(format, args...)
// Log with context data in the record attributes
contextLogger.Info(message)
}
// LogWarnWithContext logs a warning message with context information.
func LogWarnWithContext(ctx context.Context, logger *slog.Logger, format string, args ...interface{}) {
if logger == nil {
return
}
// Create logger with context fields
contextLogger := SlogEventWithContext(ctx, logger)
// Format the message
message := fmt.Sprintf(format, args...)
// Log with context data in the record attributes
contextLogger.Warn(message)
}
// LogErrorWithContext logs an error message with context information
func LogErrorWithContext(ctx context.Context, logger *slog.Logger, format string, args ...interface{}) {
if logger == nil {
return
}
// Create logger with context fields
contextLogger := SlogEventWithContext(ctx, logger)
// Format the message
msg := fmt.Sprintf(format, args...)
// Log with context data in the record attributes
contextLogger.Error(msg, slog.String("error", msg))
}

89
contextutil/context.go Normal file
View File

@@ -0,0 +1,89 @@
package contextutil
import (
"context"
"time"
"git.antanst.com/antanst/uid"
)
// ContextKey type for context values
type ContextKey string
// Context keys
const (
CtxKeyURL ContextKey = "url" // Full URL being processed
CtxKeyHost ContextKey = "host" // Host of the URL
CtxKeyRequestID ContextKey = "request_id" // Unique ID for this processing request
CtxKeyWorkerID ContextKey = "worker_id" // Worker ID processing this request
CtxKeyStartTime ContextKey = "start_time" // When processing started
CtxKeyComponent ContextKey = "component" // Component name for logging
)
// NewRequestContext creates a new, cancellable context
// with a timeout and
func NewRequestContext(parentCtx context.Context, url string, host string, workerID int) (context.Context, context.CancelFunc) {
ctx, cancel := context.WithTimeout(parentCtx, 120*time.Second)
requestID := uid.UID()
ctx = context.WithValue(ctx, CtxKeyURL, url)
ctx = context.WithValue(ctx, CtxKeyHost, host)
ctx = context.WithValue(ctx, CtxKeyRequestID, requestID)
ctx = context.WithValue(ctx, CtxKeyWorkerID, workerID)
ctx = context.WithValue(ctx, CtxKeyStartTime, time.Now())
return ctx, cancel
}
// Helper functions to get values from context
// GetURLFromContext retrieves the URL from the context
func GetURLFromContext(ctx context.Context) string {
if url, ok := ctx.Value(CtxKeyURL).(string); ok {
return url
}
return ""
}
// GetHostFromContext retrieves the host from the context
func GetHostFromContext(ctx context.Context) string {
if host, ok := ctx.Value(CtxKeyHost).(string); ok {
return host
}
return ""
}
// GetRequestIDFromContext retrieves the request ID from the context
func GetRequestIDFromContext(ctx context.Context) string {
if id, ok := ctx.Value(CtxKeyRequestID).(string); ok {
return id
}
return ""
}
// GetWorkerIDFromContext retrieves the worker ID from the context
func GetWorkerIDFromContext(ctx context.Context) int {
if id, ok := ctx.Value(CtxKeyWorkerID).(int); ok {
return id
}
return -1
}
// GetStartTimeFromContext retrieves the start time from the context
func GetStartTimeFromContext(ctx context.Context) time.Time {
if startTime, ok := ctx.Value(CtxKeyStartTime).(time.Time); ok {
return startTime
}
return time.Time{}
}
// GetComponentFromContext retrieves the component name from the context
func GetComponentFromContext(ctx context.Context) string {
if component, ok := ctx.Value(CtxKeyComponent).(string); ok {
return component
}
return ""
}
// ContextWithComponent adds or updates the component name in the context
func ContextWithComponent(ctx context.Context, component string) context.Context {
return context.WithValue(ctx, CtxKeyComponent, component)
}

140
db/db.go
View File

@@ -1,140 +0,0 @@
package db
import (
"encoding/json"
"fmt"
"os"
"strconv"
"time"
"gemini-grc/common/snapshot"
commonUrl "gemini-grc/common/url"
"gemini-grc/config"
"gemini-grc/logging"
"github.com/antanst/go_errors"
_ "github.com/jackc/pgx/v5/stdlib" // PGX driver for PostgreSQL
"github.com/jmoiron/sqlx"
"github.com/lib/pq"
)
func ConnectToDB() (*sqlx.DB, error) {
connStr := fmt.Sprintf("postgres://%s:%s@%s:%s/%s", //nolint:nosprintfhostport
os.Getenv("PG_USER"),
os.Getenv("PG_PASSWORD"),
os.Getenv("PG_HOST"),
os.Getenv("PG_PORT"),
os.Getenv("PG_DATABASE"),
)
// Create a connection pool
db, err := sqlx.Open("pgx", connStr)
if err != nil {
return nil, go_errors.NewFatalError(fmt.Errorf("unable to connect to database with URL %s: %w", connStr, err))
}
// TODO move PG_MAX_OPEN_CONNECTIONS to config env variables
maxConnections, err := strconv.Atoi(os.Getenv("PG_MAX_OPEN_CONNECTIONS"))
if err != nil {
return nil, go_errors.NewFatalError(fmt.Errorf("unable to set DB max connections: %w", err))
}
db.SetMaxOpenConns(maxConnections)
err = db.Ping()
if err != nil {
return nil, go_errors.NewFatalError(fmt.Errorf("unable to ping database: %w", err))
}
logging.LogDebug("Connected to database")
return db, nil
}
// IsDeadlockError checks if the error is a PostgreSQL deadlock error.
func IsDeadlockError(err error) bool {
err = go_errors.Unwrap(err)
var pqErr *pq.Error
if go_errors.As(err, &pqErr) {
return pqErr.Code == "40P01" // PostgreSQL deadlock error code
}
return false
}
func GetRandomUrls(tx *sqlx.Tx) ([]string, error) {
var urls []string
err := tx.Select(&urls, SQL_SELECT_RANDOM_URLS, config.CONFIG.WorkerBatchSize)
if err != nil {
return nil, go_errors.NewFatalError(err)
}
return urls, nil
}
func GetRandomUrlsWithBasePath(tx *sqlx.Tx) ([]string, error) {
SqlQuery := `SELECT url FROM snapshots WHERE url ~ '^[^:]+://[^/]+/?$' ORDER BY RANDOM() LIMIT $1`
var urls []string
err := tx.Select(&urls, SqlQuery, config.CONFIG.WorkerBatchSize)
if err != nil {
return nil, go_errors.NewFatalError(err)
}
return urls, nil
}
func InsertURL(tx *sqlx.Tx, url string) error {
logging.LogDebug("Inserting URL %s", url)
query := SQL_INSERT_URL
normalizedURL, err := commonUrl.ParseURL(url, "", true)
if err != nil {
return err
}
a := struct {
Url string
Host string
Timestamp time.Time
}{
Url: normalizedURL.Full,
Host: normalizedURL.Hostname,
Timestamp: time.Now(),
}
_, err = tx.NamedExec(query, a)
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("cannot insert URL: database error %w URL %s", err, url))
}
return nil
}
func DeleteURL(tx *sqlx.Tx, url string) error {
logging.LogDebug("Deleting URL %s", url)
query := SQL_DELETE_URL
_, err := tx.Exec(query, url)
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("cannot delete URL: database error %w URL %s", err, url))
}
return nil
}
func OverwriteSnapshot(tx *sqlx.Tx, s *snapshot.Snapshot) (err error) {
if config.CONFIG.DryRun {
marshalled, err := json.MarshalIndent(s, "", " ")
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("JSON serialization error for %v", s))
}
logging.LogDebug("Would upsert snapshot %s", marshalled)
return nil
}
query := SQL_UPSERT_SNAPSHOT
rows, err := tx.NamedQuery(query, s)
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("cannot overwrite snapshot: %w", err))
}
defer func() {
_err := rows.Close()
if err == nil && _err != nil {
err = go_errors.NewFatalError(fmt.Errorf("cannot overwrite snapshot: error closing rows: %w", err))
}
}()
if rows.Next() {
var returnedID int
err = rows.Scan(&returnedID)
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("cannot overwrite snapshot: error scanning rows: %w", err))
}
s.ID = returnedID
}
return nil
}

View File

@@ -1,114 +0,0 @@
package gemini
import (
"fmt"
"net/url"
"os"
"path"
"path/filepath"
"strings"
"gemini-grc/common/snapshot"
"gemini-grc/logging"
)
// sanitizePath encodes invalid filesystem characters using URL encoding.
// Example:
// /example/path/to/page?query=param&another=value
// would become
// example/path/to/page%3Fquery%3Dparam%26another%3Dvalue
func sanitizePath(p string) string {
// Split the path into its components
components := strings.Split(p, "/")
// Encode each component separately
for i, component := range components {
// Decode any existing percent-encoded characters
decodedComponent, err := url.PathUnescape(component)
if err != nil {
decodedComponent = component // Fallback to original if unescape fails
}
// Encode the component to escape invalid filesystem characters
encodedComponent := url.QueryEscape(decodedComponent)
// Replace '+' (from QueryEscape) with '%20' to handle spaces correctly
encodedComponent = strings.ReplaceAll(encodedComponent, "+", "%20")
components[i] = encodedComponent
}
// Rejoin the components into a sanitized path
safe := filepath.Join(components...)
return safe
}
// getFilePath constructs a safe file path from the root path and URL path.
// It URL-encodes invalid filesystem characters to ensure the path is valid.
func calcFilePath(rootPath, urlPath string) (string, error) {
// Normalize the URL path
cleanPath := filepath.Clean(urlPath)
// Safe check to prevent directory traversal
if strings.Contains(cleanPath, "..") {
return "", fmt.Errorf("Invalid URL path: contains directory traversal")
}
// Sanitize the path by encoding invalid characters
safePath := sanitizePath(cleanPath)
// Join the root path and the sanitized URL path
finalPath := filepath.Join(rootPath, safePath)
return finalPath, nil
}
func SaveToFile(rootPath string, s *snapshot.Snapshot, done chan struct{}) {
parentPath := path.Join(rootPath, s.URL.Hostname)
urlPath := s.URL.Path
// If path is empty, add `index.gmi` as the file to save
if urlPath == "" || urlPath == "." {
urlPath = "index.gmi"
}
// If path ends with '/' then add index.gmi for the
// directory to be created.
if strings.HasSuffix(urlPath, "/") {
urlPath = strings.Join([]string{urlPath, "index.gmi"}, "")
}
finalPath, err := calcFilePath(parentPath, urlPath)
if err != nil {
logging.LogError("GeminiError saving %s: %w", s.URL, err)
return
}
// Ensure the directory exists
dir := filepath.Dir(finalPath)
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
logging.LogError("Failed to create directory: %w", err)
return
}
if s.MimeType.Valid && s.MimeType.String == "text/gemini" {
err = os.WriteFile(finalPath, (*s).Data.V, 0o666)
} else {
err = os.WriteFile(finalPath, []byte((*s).GemText.String), 0o666)
}
if err != nil {
logging.LogError("GeminiError saving %s: %w", s.URL.Full, err)
}
close(done)
}
func ReadLines(path string) []string {
data, err := os.ReadFile(path)
if err != nil {
panic(fmt.Sprintf("Failed to read file: %s", err))
}
lines := strings.Split(string(data), "\n")
// remove last line if empty
// (happens when file ends with '\n')
if lines[len(lines)-1] == "" {
lines = lines[:len(lines)-1]
}
return lines
}

View File

@@ -1,49 +0,0 @@
package hostPool
import (
"sync"
"time"
"gemini-grc/logging"
)
var hostPool = HostPool{hostnames: make(map[string]struct{})} //nolint:gochecknoglobals
type HostPool struct {
hostnames map[string]struct{}
lock sync.RWMutex
}
//func (p *HostPool) add(key string) {
// p.lock.Lock()
// defer p.lock.Unlock()
// p.hostnames[key] = struct{}{}
//}
//
//func (p *HostPool) has(key string) bool {
// p.lock.RLock()
// defer p.lock.RUnlock()
// _, ok := p.hostnames[key]
// return ok
//}
func RemoveHostFromPool(key string) {
hostPool.lock.Lock()
defer hostPool.lock.Unlock()
delete(hostPool.hostnames, key)
}
func AddHostToHostPool(key string) {
for {
hostPool.lock.Lock()
_, exists := hostPool.hostnames[key]
if !exists {
hostPool.hostnames[key] = struct{}{}
hostPool.lock.Unlock()
return
}
hostPool.lock.Unlock()
time.Sleep(1 * time.Second)
logging.LogInfo("Waiting to add %s to pool...", key)
}
}

View File

@@ -1,23 +1,188 @@
// Package logging provides a simple, structured logging interface using slog.
// It offers colored output for better readability in terminal environments.
package logging package logging
import ( import (
"context"
"fmt" "fmt"
"io"
zlog "github.com/rs/zerolog/log" "log/slog"
"os"
"strings"
"sync"
) )
// Global logger instance.
var slogLogger *slog.Logger
// Current log level - used to filter logs.
var currentLogLevel = slog.LevelInfo
// ANSI color codes for terminal output.
const (
colorReset = "\033[0m"
levelDebug = "\033[37m" // Gray
levelInfo = "\033[32m" // Green
levelWarn = "\033[33m" // Yellow
levelError = "\033[31m" // Red
)
// Standard helper functions for logging
func LogDebug(format string, args ...interface{}) { func LogDebug(format string, args ...interface{}) {
zlog.Debug().Msg(fmt.Sprintf(format, args...)) if slogLogger != nil {
slogLogger.Debug(fmt.Sprintf(format, args...))
}
} }
func LogInfo(format string, args ...interface{}) { func LogInfo(format string, args ...interface{}) {
zlog.Info().Msg(fmt.Sprintf(format, args...)) if slogLogger != nil {
slogLogger.Info(fmt.Sprintf(format, args...))
}
} }
func LogWarn(format string, args ...interface{}) { func LogWarn(format string, args ...interface{}) {
zlog.Warn().Msg(fmt.Sprintf(format, args...)) if slogLogger != nil {
slogLogger.Warn(fmt.Sprintf(format, args...))
}
} }
func LogError(format string, args ...interface{}) { func LogError(format string, args ...interface{}) {
zlog.Error().Err(fmt.Errorf(format, args...)).Msg("") if slogLogger != nil {
msg := fmt.Sprintf(format, args...)
slogLogger.Error(msg, slog.String("error", msg))
}
}
// InitSlogger initializes the slog logger with custom handler.
func InitSlogger(level slog.Level) {
// Set the global log level
currentLogLevel = level
// Create the handler with color support
baseHandler := NewColorHandler(os.Stderr)
// Create and set the new logger
slogLogger = slog.New(baseHandler)
// Set as default logger
slog.SetDefault(slogLogger)
// Print a startup message to verify logging is working
slogLogger.Info("Slog initialized", "level", level.String())
}
// GetSlogger returns the current global slog logger instance.
// Can be used by other packages
func GetSlogger() *slog.Logger {
if slogLogger == nil {
return slog.Default()
}
return slogLogger
}
// ColorHandler formats logs with colors for better terminal readability
type ColorHandler struct {
out io.Writer
mu *sync.Mutex
attrs []slog.Attr // Store attributes for this handler
}
// NewColorHandler creates a new handler that writes colored logs to the provided writer
func NewColorHandler(w io.Writer) *ColorHandler {
if w == nil {
w = os.Stderr
}
return &ColorHandler{
out: w,
mu: &sync.Mutex{},
attrs: make([]slog.Attr, 0),
}
}
// Enabled checks if the given log level is enabled
func (h *ColorHandler) Enabled(_ context.Context, level slog.Level) bool {
return level >= currentLogLevel
}
// Handle processes a log record, formatting it with colors
func (h *ColorHandler) Handle(ctx context.Context, r slog.Record) error {
h.mu.Lock()
defer h.mu.Unlock()
// Format time
timeStr := fmt.Sprintf("[%s]", r.Time.Format("2006-01-02 15:04:05"))
// Format level
var levelStr string
switch r.Level {
case slog.LevelDebug:
levelStr = fmt.Sprintf("%sDEBUG%s", levelDebug, colorReset)
case slog.LevelInfo:
levelStr = fmt.Sprintf("%sINFO%s", levelInfo, colorReset)
case slog.LevelWarn:
levelStr = fmt.Sprintf("%sWARN%s", levelWarn, colorReset)
case slog.LevelError:
levelStr = fmt.Sprintf("%sERROR%s", levelError, colorReset)
default:
levelStr = r.Level.String()
}
// Build prefix
prefix := fmt.Sprintf("%s %s ", timeStr, levelStr)
// Format message - we'll collect any special fields separately
attrMap := make(map[string]string)
// First collect attributes from the handler itself
for _, attr := range h.attrs {
attrMap[attr.Key] = attr.Value.String()
}
// Then extract from record attributes, which might override handler attributes
r.Attrs(func(a slog.Attr) bool {
attrMap[a.Key] = a.Value.String()
return true
})
// Format message with attributes on the same line
msg := fmt.Sprintf("%s%s", prefix, r.Message)
// Add attributes to the same line if present
if len(attrMap) > 0 {
// Add a space after the message
msg += " "
// Build attribute string
attrs := make([]string, 0, len(attrMap))
for k, v := range attrMap {
attrs = append(attrs, fmt.Sprintf("%s=%s", k, v))
}
// Join all attributes with spaces
msg += strings.Join(attrs, " ")
}
// Add newline at the end
msg += "\n"
// Write to output
_, err := io.WriteString(h.out, msg)
return err
}
// WithGroup returns a new Handler that inherits from this Handler
func (h *ColorHandler) WithGroup(name string) slog.Handler {
return h // For simplicity, we don't support groups
}
// WithAttrs returns a new Handler whose attributes include both
// the receiver's attributes and the arguments
func (h *ColorHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
// Create a new handler with the same output but additional attributes
newHandler := &ColorHandler{
out: h.out,
mu: h.mu,
attrs: append(append([]slog.Attr{}, h.attrs...), attrs...),
}
return newHandler
} }

View File

@@ -1,8 +1,22 @@
DROP TABLE IF EXISTS snapshots; DROP TABLE IF EXISTS snapshots;
DROP TABLE IF EXISTS urls;
CREATE TABLE urls (
id SERIAL PRIMARY KEY,
url TEXT NOT NULL,
host TEXT NOT NULL,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
being_processed BOOLEAN
);
CREATE UNIQUE INDEX urls_url_key ON urls (url);
CREATE INDEX idx_urls_url ON urls (url);
CREATE INDEX idx_urls_timestamp ON urls (timestamp);
CREATE INDEX idx_being_processed ON urls (being_processed);
CREATE TABLE snapshots ( CREATE TABLE snapshots (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
url TEXT NOT NULL UNIQUE, url TEXT NOT NULL,
host TEXT NOT NULL, host TEXT NOT NULL,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
mimetype TEXT, mimetype TEXT,
@@ -11,9 +25,11 @@ CREATE TABLE snapshots (
links JSONB, links JSONB,
lang TEXT, lang TEXT,
response_code INTEGER, response_code INTEGER,
error TEXT error TEXT,
header TEXT
); );
CREATE UNIQUE INDEX idx_url_timestamp ON snapshots (url, timestamp);
CREATE INDEX idx_url ON snapshots (url); CREATE INDEX idx_url ON snapshots (url);
CREATE INDEX idx_timestamp ON snapshots (timestamp); CREATE INDEX idx_timestamp ON snapshots (timestamp);
CREATE INDEX idx_mimetype ON snapshots (mimetype); CREATE INDEX idx_mimetype ON snapshots (mimetype);
@@ -21,15 +37,7 @@ CREATE INDEX idx_lang ON snapshots (lang);
CREATE INDEX idx_response_code ON snapshots (response_code); CREATE INDEX idx_response_code ON snapshots (response_code);
CREATE INDEX idx_error ON snapshots (error); CREATE INDEX idx_error ON snapshots (error);
CREATE INDEX idx_host ON snapshots (host); CREATE INDEX idx_host ON snapshots (host);
CREATE INDEX unique_uid_url ON snapshots (uid, url); CREATE INDEX idx_response_code_error ON snapshots (response_code, error);
CREATE INDEX idx_response_code_error_nulls ON snapshots (response_code, error) WHERE response_code IS NULL AND error IS NULL; CREATE INDEX idx_response_code_error_nulls ON snapshots (response_code, error) WHERE response_code IS NULL AND error IS NULL;
CREATE INDEX idx_snapshots_unprocessed ON snapshots (host) WHERE response_code IS NULL AND error IS NULL;
CREATE TABLE urls ( CREATE INDEX idx_url_latest ON snapshots (url, timestamp DESC);
id SERIAL PRIMARY KEY,
url TEXT NOT NULL UNIQUE,
host TEXT NOT NULL,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_urls_url ON urls (url);
CREATE INDEX idx_urls_timestamp ON urls (timestamp);