From 55bb0d96d0c3628ceca4907ad279396b59ad8b35 Mon Sep 17 00:00:00 2001 From: antanst <> Date: Wed, 18 Jun 2025 12:02:55 +0300 Subject: [PATCH] Update last_crawled timestamp when skipping duplicate content and improve error handling --- common/worker.go | 4 ++++ db/db.go | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/common/worker.go b/common/worker.go index 4f6a331..ddc407d 100644 --- a/common/worker.go +++ b/common/worker.go @@ -236,6 +236,10 @@ func WorkOnUrl(ctx context.Context, tx *sqlx.Tx, url string) (err error) { return saveSnapshotAndRemoveURL(ctx, tx, s) } else { contextlog.LogInfoWithContext(ctx, logging.GetSlogger(), "%2d %s (but old content exists, not updating)", s.ResponseCode.ValueOrZero(), s.Error.ValueOrZero()) + err = gemdb.Database.UpdateLastCrawled(ctx, tx, s.URL.String()) + if err != nil { + return err + } return removeURL(ctx, tx, s.URL.String()) } } else { diff --git a/db/db.go b/db/db.go index 9607184..21481dd 100644 --- a/db/db.go +++ b/db/db.go @@ -448,7 +448,7 @@ func (d *DbServiceImpl) GetLatestSnapshot(ctx context.Context, tx *sqlx.Tx, url if errors.Is(err, sql.ErrNoRows) { return nil, nil } - return nil, xerrors.NewError(fmt.Errorf("cannot get latest snapshot for URL %s: %w", url, err), 0, "", false) + return nil, xerrors.NewError(fmt.Errorf("cannot get latest snapshot for URL %s: %w", url, err), 0, "", true) } return s, nil }