Update last_crawled timestamp when skipping duplicate content and improve error handling

This commit is contained in:
antanst
2025-06-18 12:02:55 +03:00
parent 967f371777
commit 621868f3b3
2 changed files with 5 additions and 1 deletions

View File

@@ -236,6 +236,10 @@ func WorkOnUrl(ctx context.Context, tx *sqlx.Tx, url string) (err error) {
return saveSnapshotAndRemoveURL(ctx, tx, s) return saveSnapshotAndRemoveURL(ctx, tx, s)
} else { } else {
contextlog.LogInfoWithContext(ctx, logging.GetSlogger(), "%2d %s (but old content exists, not updating)", s.ResponseCode.ValueOrZero(), s.Error.ValueOrZero()) contextlog.LogInfoWithContext(ctx, logging.GetSlogger(), "%2d %s (but old content exists, not updating)", s.ResponseCode.ValueOrZero(), s.Error.ValueOrZero())
err = gemdb.Database.UpdateLastCrawled(ctx, tx, s.URL.String())
if err != nil {
return err
}
return removeURL(ctx, tx, s.URL.String()) return removeURL(ctx, tx, s.URL.String())
} }
} else { } else {

View File

@@ -448,7 +448,7 @@ func (d *DbServiceImpl) GetLatestSnapshot(ctx context.Context, tx *sqlx.Tx, url
if errors.Is(err, sql.ErrNoRows) { if errors.Is(err, sql.ErrNoRows) {
return nil, nil return nil, nil
} }
return nil, xerrors.NewError(fmt.Errorf("cannot get latest snapshot for URL %s: %w", url, err), 0, "", false) return nil, xerrors.NewError(fmt.Errorf("cannot get latest snapshot for URL %s: %w", url, err), 0, "", true)
} }
return s, nil return s, nil
} }