Implement structured logging with slog

- Replace zerolog with Go's standard slog package
- Add ColorHandler for terminal color output
- Add context-aware logging system
- Format attributes on the same line as log messages
- Use green color for INFO level logs
- Set up context value extraction helpers
This commit is contained in:
2025-05-22 12:44:08 +03:00
committed by antanst
parent b8ea6fab4a
commit 4ef3f70f1f
7 changed files with 393 additions and 322 deletions

View File

@@ -0,0 +1,112 @@
package contextlog
import (
"context"
"fmt"
"log/slog"
"gemini-grc/contextutil"
)
// SlogEventWithContext adds context information as structured fields to the log event.
func SlogEventWithContext(ctx context.Context, logger *slog.Logger) *slog.Logger {
// Start with the provided logger
if logger == nil {
// If logger isn't initialized, use the default logger
return slog.Default()
}
// Get context values - will be added directly to log records
host := contextutil.GetHostFromContext(ctx)
requestID := contextutil.GetRequestIDFromContext(ctx)
component := contextutil.GetComponentFromContext(ctx)
workerID := contextutil.GetWorkerIDFromContext(ctx)
url := contextutil.GetURLFromContext(ctx)
// Add all context fields to the logger
if host != "" {
logger = logger.With("host", host)
}
if requestID != "" {
logger = logger.With("request_id", requestID)
}
if workerID >= 0 {
logger = logger.With("worker_id", workerID)
}
if component != "" {
logger = logger.With("component", component)
}
if url != "" {
logger = logger.With("url", url)
}
return logger
}
// LogDebugWithContext logs a debug message with context information.
func LogDebugWithContext(ctx context.Context, logger *slog.Logger, format string, args ...interface{}) {
if logger == nil {
return
}
// Create logger with context fields
contextLogger := SlogEventWithContext(ctx, logger)
// Format the message
message := fmt.Sprintf(format, args...)
// Log with context data in the record attributes
contextLogger.Debug(message)
}
// LogInfoWithContext logs an info message with context information.
func LogInfoWithContext(ctx context.Context, logger *slog.Logger, format string, args ...interface{}) {
if logger == nil {
return
}
// Create logger with context fields
contextLogger := SlogEventWithContext(ctx, logger)
// Format the message
message := fmt.Sprintf(format, args...)
// Log with context data in the record attributes
contextLogger.Info(message)
}
// LogWarnWithContext logs a warning message with context information.
func LogWarnWithContext(ctx context.Context, logger *slog.Logger, format string, args ...interface{}) {
if logger == nil {
return
}
// Create logger with context fields
contextLogger := SlogEventWithContext(ctx, logger)
// Format the message
message := fmt.Sprintf(format, args...)
// Log with context data in the record attributes
contextLogger.Warn(message)
}
// LogErrorWithContext logs an error message with context information
func LogErrorWithContext(ctx context.Context, logger *slog.Logger, format string, args ...interface{}) {
if logger == nil {
return
}
// Create logger with context fields
contextLogger := SlogEventWithContext(ctx, logger)
// Format the message
msg := fmt.Sprintf(format, args...)
// Log with context data in the record attributes
contextLogger.Error(msg, slog.String("error", msg))
}

89
contextutil/context.go Normal file
View File

@@ -0,0 +1,89 @@
package contextutil
import (
"context"
"time"
"git.antanst.com/antanst/uid"
)
// ContextKey type for context values
type ContextKey string
// Context keys
const (
CtxKeyURL ContextKey = "url" // Full URL being processed
CtxKeyHost ContextKey = "host" // Host of the URL
CtxKeyRequestID ContextKey = "request_id" // Unique ID for this processing request
CtxKeyWorkerID ContextKey = "worker_id" // Worker ID processing this request
CtxKeyStartTime ContextKey = "start_time" // When processing started
CtxKeyComponent ContextKey = "component" // Component name for logging
)
// NewRequestContext creates a new, cancellable context
// with a timeout and
func NewRequestContext(parentCtx context.Context, url string, host string, workerID int) (context.Context, context.CancelFunc) {
ctx, cancel := context.WithTimeout(parentCtx, 120*time.Second)
requestID := uid.UID()
ctx = context.WithValue(ctx, CtxKeyURL, url)
ctx = context.WithValue(ctx, CtxKeyHost, host)
ctx = context.WithValue(ctx, CtxKeyRequestID, requestID)
ctx = context.WithValue(ctx, CtxKeyWorkerID, workerID)
ctx = context.WithValue(ctx, CtxKeyStartTime, time.Now())
return ctx, cancel
}
// Helper functions to get values from context
// GetURLFromContext retrieves the URL from the context
func GetURLFromContext(ctx context.Context) string {
if url, ok := ctx.Value(CtxKeyURL).(string); ok {
return url
}
return ""
}
// GetHostFromContext retrieves the host from the context
func GetHostFromContext(ctx context.Context) string {
if host, ok := ctx.Value(CtxKeyHost).(string); ok {
return host
}
return ""
}
// GetRequestIDFromContext retrieves the request ID from the context
func GetRequestIDFromContext(ctx context.Context) string {
if id, ok := ctx.Value(CtxKeyRequestID).(string); ok {
return id
}
return ""
}
// GetWorkerIDFromContext retrieves the worker ID from the context
func GetWorkerIDFromContext(ctx context.Context) int {
if id, ok := ctx.Value(CtxKeyWorkerID).(int); ok {
return id
}
return -1
}
// GetStartTimeFromContext retrieves the start time from the context
func GetStartTimeFromContext(ctx context.Context) time.Time {
if startTime, ok := ctx.Value(CtxKeyStartTime).(time.Time); ok {
return startTime
}
return time.Time{}
}
// GetComponentFromContext retrieves the component name from the context
func GetComponentFromContext(ctx context.Context) string {
if component, ok := ctx.Value(CtxKeyComponent).(string); ok {
return component
}
return ""
}
// ContextWithComponent adds or updates the component name in the context
func ContextWithComponent(ctx context.Context, component string) context.Context {
return context.WithValue(ctx, CtxKeyComponent, component)
}

140
db/db.go
View File

@@ -1,140 +0,0 @@
package db
import (
"encoding/json"
"fmt"
"os"
"strconv"
"time"
"gemini-grc/common/snapshot"
commonUrl "gemini-grc/common/url"
"gemini-grc/config"
"gemini-grc/logging"
"github.com/antanst/go_errors"
_ "github.com/jackc/pgx/v5/stdlib" // PGX driver for PostgreSQL
"github.com/jmoiron/sqlx"
"github.com/lib/pq"
)
func ConnectToDB() (*sqlx.DB, error) {
connStr := fmt.Sprintf("postgres://%s:%s@%s:%s/%s", //nolint:nosprintfhostport
os.Getenv("PG_USER"),
os.Getenv("PG_PASSWORD"),
os.Getenv("PG_HOST"),
os.Getenv("PG_PORT"),
os.Getenv("PG_DATABASE"),
)
// Create a connection pool
db, err := sqlx.Open("pgx", connStr)
if err != nil {
return nil, go_errors.NewFatalError(fmt.Errorf("unable to connect to database with URL %s: %w", connStr, err))
}
// TODO move PG_MAX_OPEN_CONNECTIONS to config env variables
maxConnections, err := strconv.Atoi(os.Getenv("PG_MAX_OPEN_CONNECTIONS"))
if err != nil {
return nil, go_errors.NewFatalError(fmt.Errorf("unable to set DB max connections: %w", err))
}
db.SetMaxOpenConns(maxConnections)
err = db.Ping()
if err != nil {
return nil, go_errors.NewFatalError(fmt.Errorf("unable to ping database: %w", err))
}
logging.LogDebug("Connected to database")
return db, nil
}
// IsDeadlockError checks if the error is a PostgreSQL deadlock error.
func IsDeadlockError(err error) bool {
err = go_errors.Unwrap(err)
var pqErr *pq.Error
if go_errors.As(err, &pqErr) {
return pqErr.Code == "40P01" // PostgreSQL deadlock error code
}
return false
}
func GetRandomUrls(tx *sqlx.Tx) ([]string, error) {
var urls []string
err := tx.Select(&urls, SQL_SELECT_RANDOM_URLS, config.CONFIG.WorkerBatchSize)
if err != nil {
return nil, go_errors.NewFatalError(err)
}
return urls, nil
}
func GetRandomUrlsWithBasePath(tx *sqlx.Tx) ([]string, error) {
SqlQuery := `SELECT url FROM snapshots WHERE url ~ '^[^:]+://[^/]+/?$' ORDER BY RANDOM() LIMIT $1`
var urls []string
err := tx.Select(&urls, SqlQuery, config.CONFIG.WorkerBatchSize)
if err != nil {
return nil, go_errors.NewFatalError(err)
}
return urls, nil
}
func InsertURL(tx *sqlx.Tx, url string) error {
logging.LogDebug("Inserting URL %s", url)
query := SQL_INSERT_URL
normalizedURL, err := commonUrl.ParseURL(url, "", true)
if err != nil {
return err
}
a := struct {
Url string
Host string
Timestamp time.Time
}{
Url: normalizedURL.Full,
Host: normalizedURL.Hostname,
Timestamp: time.Now(),
}
_, err = tx.NamedExec(query, a)
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("cannot insert URL: database error %w URL %s", err, url))
}
return nil
}
func DeleteURL(tx *sqlx.Tx, url string) error {
logging.LogDebug("Deleting URL %s", url)
query := SQL_DELETE_URL
_, err := tx.Exec(query, url)
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("cannot delete URL: database error %w URL %s", err, url))
}
return nil
}
func OverwriteSnapshot(tx *sqlx.Tx, s *snapshot.Snapshot) (err error) {
if config.CONFIG.DryRun {
marshalled, err := json.MarshalIndent(s, "", " ")
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("JSON serialization error for %v", s))
}
logging.LogDebug("Would upsert snapshot %s", marshalled)
return nil
}
query := SQL_UPSERT_SNAPSHOT
rows, err := tx.NamedQuery(query, s)
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("cannot overwrite snapshot: %w", err))
}
defer func() {
_err := rows.Close()
if err == nil && _err != nil {
err = go_errors.NewFatalError(fmt.Errorf("cannot overwrite snapshot: error closing rows: %w", err))
}
}()
if rows.Next() {
var returnedID int
err = rows.Scan(&returnedID)
if err != nil {
return go_errors.NewFatalError(fmt.Errorf("cannot overwrite snapshot: error scanning rows: %w", err))
}
s.ID = returnedID
}
return nil
}

View File

@@ -1,114 +0,0 @@
package gemini
import (
"fmt"
"net/url"
"os"
"path"
"path/filepath"
"strings"
"gemini-grc/common/snapshot"
"gemini-grc/logging"
)
// sanitizePath encodes invalid filesystem characters using URL encoding.
// Example:
// /example/path/to/page?query=param&another=value
// would become
// example/path/to/page%3Fquery%3Dparam%26another%3Dvalue
func sanitizePath(p string) string {
// Split the path into its components
components := strings.Split(p, "/")
// Encode each component separately
for i, component := range components {
// Decode any existing percent-encoded characters
decodedComponent, err := url.PathUnescape(component)
if err != nil {
decodedComponent = component // Fallback to original if unescape fails
}
// Encode the component to escape invalid filesystem characters
encodedComponent := url.QueryEscape(decodedComponent)
// Replace '+' (from QueryEscape) with '%20' to handle spaces correctly
encodedComponent = strings.ReplaceAll(encodedComponent, "+", "%20")
components[i] = encodedComponent
}
// Rejoin the components into a sanitized path
safe := filepath.Join(components...)
return safe
}
// getFilePath constructs a safe file path from the root path and URL path.
// It URL-encodes invalid filesystem characters to ensure the path is valid.
func calcFilePath(rootPath, urlPath string) (string, error) {
// Normalize the URL path
cleanPath := filepath.Clean(urlPath)
// Safe check to prevent directory traversal
if strings.Contains(cleanPath, "..") {
return "", fmt.Errorf("Invalid URL path: contains directory traversal")
}
// Sanitize the path by encoding invalid characters
safePath := sanitizePath(cleanPath)
// Join the root path and the sanitized URL path
finalPath := filepath.Join(rootPath, safePath)
return finalPath, nil
}
func SaveToFile(rootPath string, s *snapshot.Snapshot, done chan struct{}) {
parentPath := path.Join(rootPath, s.URL.Hostname)
urlPath := s.URL.Path
// If path is empty, add `index.gmi` as the file to save
if urlPath == "" || urlPath == "." {
urlPath = "index.gmi"
}
// If path ends with '/' then add index.gmi for the
// directory to be created.
if strings.HasSuffix(urlPath, "/") {
urlPath = strings.Join([]string{urlPath, "index.gmi"}, "")
}
finalPath, err := calcFilePath(parentPath, urlPath)
if err != nil {
logging.LogError("GeminiError saving %s: %w", s.URL, err)
return
}
// Ensure the directory exists
dir := filepath.Dir(finalPath)
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
logging.LogError("Failed to create directory: %w", err)
return
}
if s.MimeType.Valid && s.MimeType.String == "text/gemini" {
err = os.WriteFile(finalPath, (*s).Data.V, 0o666)
} else {
err = os.WriteFile(finalPath, []byte((*s).GemText.String), 0o666)
}
if err != nil {
logging.LogError("GeminiError saving %s: %w", s.URL.Full, err)
}
close(done)
}
func ReadLines(path string) []string {
data, err := os.ReadFile(path)
if err != nil {
panic(fmt.Sprintf("Failed to read file: %s", err))
}
lines := strings.Split(string(data), "\n")
// remove last line if empty
// (happens when file ends with '\n')
if lines[len(lines)-1] == "" {
lines = lines[:len(lines)-1]
}
return lines
}

View File

@@ -1,49 +0,0 @@
package hostPool
import (
"sync"
"time"
"gemini-grc/logging"
)
var hostPool = HostPool{hostnames: make(map[string]struct{})} //nolint:gochecknoglobals
type HostPool struct {
hostnames map[string]struct{}
lock sync.RWMutex
}
//func (p *HostPool) add(key string) {
// p.lock.Lock()
// defer p.lock.Unlock()
// p.hostnames[key] = struct{}{}
//}
//
//func (p *HostPool) has(key string) bool {
// p.lock.RLock()
// defer p.lock.RUnlock()
// _, ok := p.hostnames[key]
// return ok
//}
func RemoveHostFromPool(key string) {
hostPool.lock.Lock()
defer hostPool.lock.Unlock()
delete(hostPool.hostnames, key)
}
func AddHostToHostPool(key string) {
for {
hostPool.lock.Lock()
_, exists := hostPool.hostnames[key]
if !exists {
hostPool.hostnames[key] = struct{}{}
hostPool.lock.Unlock()
return
}
hostPool.lock.Unlock()
time.Sleep(1 * time.Second)
logging.LogInfo("Waiting to add %s to pool...", key)
}
}

View File

@@ -1,23 +1,188 @@
// Package logging provides a simple, structured logging interface using slog.
// It offers colored output for better readability in terminal environments.
package logging
import (
"context"
"fmt"
zlog "github.com/rs/zerolog/log"
"io"
"log/slog"
"os"
"strings"
"sync"
)
// Global logger instance.
var slogLogger *slog.Logger
// Current log level - used to filter logs.
var currentLogLevel = slog.LevelInfo
// ANSI color codes for terminal output.
const (
colorReset = "\033[0m"
levelDebug = "\033[37m" // Gray
levelInfo = "\033[32m" // Green
levelWarn = "\033[33m" // Yellow
levelError = "\033[31m" // Red
)
// Standard helper functions for logging
func LogDebug(format string, args ...interface{}) {
zlog.Debug().Msg(fmt.Sprintf(format, args...))
if slogLogger != nil {
slogLogger.Debug(fmt.Sprintf(format, args...))
}
}
func LogInfo(format string, args ...interface{}) {
zlog.Info().Msg(fmt.Sprintf(format, args...))
if slogLogger != nil {
slogLogger.Info(fmt.Sprintf(format, args...))
}
}
func LogWarn(format string, args ...interface{}) {
zlog.Warn().Msg(fmt.Sprintf(format, args...))
if slogLogger != nil {
slogLogger.Warn(fmt.Sprintf(format, args...))
}
}
func LogError(format string, args ...interface{}) {
zlog.Error().Err(fmt.Errorf(format, args...)).Msg("")
if slogLogger != nil {
msg := fmt.Sprintf(format, args...)
slogLogger.Error(msg, slog.String("error", msg))
}
}
// InitSlogger initializes the slog logger with custom handler.
func InitSlogger(level slog.Level) {
// Set the global log level
currentLogLevel = level
// Create the handler with color support
baseHandler := NewColorHandler(os.Stderr)
// Create and set the new logger
slogLogger = slog.New(baseHandler)
// Set as default logger
slog.SetDefault(slogLogger)
// Print a startup message to verify logging is working
slogLogger.Info("Slog initialized", "level", level.String())
}
// GetSlogger returns the current global slog logger instance.
// Can be used by other packages
func GetSlogger() *slog.Logger {
if slogLogger == nil {
return slog.Default()
}
return slogLogger
}
// ColorHandler formats logs with colors for better terminal readability
type ColorHandler struct {
out io.Writer
mu *sync.Mutex
attrs []slog.Attr // Store attributes for this handler
}
// NewColorHandler creates a new handler that writes colored logs to the provided writer
func NewColorHandler(w io.Writer) *ColorHandler {
if w == nil {
w = os.Stderr
}
return &ColorHandler{
out: w,
mu: &sync.Mutex{},
attrs: make([]slog.Attr, 0),
}
}
// Enabled checks if the given log level is enabled
func (h *ColorHandler) Enabled(_ context.Context, level slog.Level) bool {
return level >= currentLogLevel
}
// Handle processes a log record, formatting it with colors
func (h *ColorHandler) Handle(ctx context.Context, r slog.Record) error {
h.mu.Lock()
defer h.mu.Unlock()
// Format time
timeStr := fmt.Sprintf("[%s]", r.Time.Format("2006-01-02 15:04:05"))
// Format level
var levelStr string
switch r.Level {
case slog.LevelDebug:
levelStr = fmt.Sprintf("%sDEBUG%s", levelDebug, colorReset)
case slog.LevelInfo:
levelStr = fmt.Sprintf("%sINFO%s", levelInfo, colorReset)
case slog.LevelWarn:
levelStr = fmt.Sprintf("%sWARN%s", levelWarn, colorReset)
case slog.LevelError:
levelStr = fmt.Sprintf("%sERROR%s", levelError, colorReset)
default:
levelStr = r.Level.String()
}
// Build prefix
prefix := fmt.Sprintf("%s %s ", timeStr, levelStr)
// Format message - we'll collect any special fields separately
attrMap := make(map[string]string)
// First collect attributes from the handler itself
for _, attr := range h.attrs {
attrMap[attr.Key] = attr.Value.String()
}
// Then extract from record attributes, which might override handler attributes
r.Attrs(func(a slog.Attr) bool {
attrMap[a.Key] = a.Value.String()
return true
})
// Format message with attributes on the same line
msg := fmt.Sprintf("%s%s", prefix, r.Message)
// Add attributes to the same line if present
if len(attrMap) > 0 {
// Add a space after the message
msg += " "
// Build attribute string
attrs := make([]string, 0, len(attrMap))
for k, v := range attrMap {
attrs = append(attrs, fmt.Sprintf("%s=%s", k, v))
}
// Join all attributes with spaces
msg += strings.Join(attrs, " ")
}
// Add newline at the end
msg += "\n"
// Write to output
_, err := io.WriteString(h.out, msg)
return err
}
// WithGroup returns a new Handler that inherits from this Handler
func (h *ColorHandler) WithGroup(name string) slog.Handler {
return h // For simplicity, we don't support groups
}
// WithAttrs returns a new Handler whose attributes include both
// the receiver's attributes and the arguments
func (h *ColorHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
// Create a new handler with the same output but additional attributes
newHandler := &ColorHandler{
out: h.out,
mu: h.mu,
attrs: append(append([]slog.Attr{}, h.attrs...), attrs...),
}
return newHandler
}

View File

@@ -1,8 +1,22 @@
DROP TABLE IF EXISTS snapshots;
DROP TABLE IF EXISTS urls;
CREATE TABLE urls (
id SERIAL PRIMARY KEY,
url TEXT NOT NULL,
host TEXT NOT NULL,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
being_processed BOOLEAN
);
CREATE UNIQUE INDEX urls_url_key ON urls (url);
CREATE INDEX idx_urls_url ON urls (url);
CREATE INDEX idx_urls_timestamp ON urls (timestamp);
CREATE INDEX idx_being_processed ON urls (being_processed);
CREATE TABLE snapshots (
id SERIAL PRIMARY KEY,
url TEXT NOT NULL UNIQUE,
url TEXT NOT NULL,
host TEXT NOT NULL,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
mimetype TEXT,
@@ -11,9 +25,11 @@ CREATE TABLE snapshots (
links JSONB,
lang TEXT,
response_code INTEGER,
error TEXT
error TEXT,
header TEXT
);
CREATE UNIQUE INDEX idx_url_timestamp ON snapshots (url, timestamp);
CREATE INDEX idx_url ON snapshots (url);
CREATE INDEX idx_timestamp ON snapshots (timestamp);
CREATE INDEX idx_mimetype ON snapshots (mimetype);
@@ -21,15 +37,7 @@ CREATE INDEX idx_lang ON snapshots (lang);
CREATE INDEX idx_response_code ON snapshots (response_code);
CREATE INDEX idx_error ON snapshots (error);
CREATE INDEX idx_host ON snapshots (host);
CREATE INDEX unique_uid_url ON snapshots (uid, url);
CREATE INDEX idx_response_code_error ON snapshots (response_code, error);
CREATE INDEX idx_response_code_error_nulls ON snapshots (response_code, error) WHERE response_code IS NULL AND error IS NULL;
CREATE TABLE urls (
id SERIAL PRIMARY KEY,
url TEXT NOT NULL UNIQUE,
host TEXT NOT NULL,
timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_urls_url ON urls (url);
CREATE INDEX idx_urls_timestamp ON urls (timestamp);
CREATE INDEX idx_snapshots_unprocessed ON snapshots (host) WHERE response_code IS NULL AND error IS NULL;
CREATE INDEX idx_url_latest ON snapshots (url, timestamp DESC);