From ffeef334e737307288bef5db7be95778946af7de Mon Sep 17 00:00:00 2001 From: antanst <> Date: Wed, 18 Jun 2025 12:02:55 +0300 Subject: [PATCH] Update last_crawled timestamp when skipping duplicate content and improve error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- common/worker.go | 4 ++++ db/db.go | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/common/worker.go b/common/worker.go index 4f6a331..ddc407d 100644 --- a/common/worker.go +++ b/common/worker.go @@ -236,6 +236,10 @@ func WorkOnUrl(ctx context.Context, tx *sqlx.Tx, url string) (err error) { return saveSnapshotAndRemoveURL(ctx, tx, s) } else { contextlog.LogInfoWithContext(ctx, logging.GetSlogger(), "%2d %s (but old content exists, not updating)", s.ResponseCode.ValueOrZero(), s.Error.ValueOrZero()) + err = gemdb.Database.UpdateLastCrawled(ctx, tx, s.URL.String()) + if err != nil { + return err + } return removeURL(ctx, tx, s.URL.String()) } } else { diff --git a/db/db.go b/db/db.go index 9607184..21481dd 100644 --- a/db/db.go +++ b/db/db.go @@ -448,7 +448,7 @@ func (d *DbServiceImpl) GetLatestSnapshot(ctx context.Context, tx *sqlx.Tx, url if errors.Is(err, sql.ErrNoRows) { return nil, nil } - return nil, xerrors.NewError(fmt.Errorf("cannot get latest snapshot for URL %s: %w", url, err), 0, "", false) + return nil, xerrors.NewError(fmt.Errorf("cannot get latest snapshot for URL %s: %w", url, err), 0, "", true) } return s, nil }