Update last_crawled timestamp when skipping duplicate content and improve error handling
This commit is contained in:
@@ -236,6 +236,10 @@ func WorkOnUrl(ctx context.Context, tx *sqlx.Tx, url string) (err error) {
|
|||||||
return saveSnapshotAndRemoveURL(ctx, tx, s)
|
return saveSnapshotAndRemoveURL(ctx, tx, s)
|
||||||
} else {
|
} else {
|
||||||
contextlog.LogInfoWithContext(ctx, logging.GetSlogger(), "%2d %s (but old content exists, not updating)", s.ResponseCode.ValueOrZero(), s.Error.ValueOrZero())
|
contextlog.LogInfoWithContext(ctx, logging.GetSlogger(), "%2d %s (but old content exists, not updating)", s.ResponseCode.ValueOrZero(), s.Error.ValueOrZero())
|
||||||
|
err = gemdb.Database.UpdateLastCrawled(ctx, tx, s.URL.String())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
return removeURL(ctx, tx, s.URL.String())
|
return removeURL(ctx, tx, s.URL.String())
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
2
db/db.go
2
db/db.go
@@ -448,7 +448,7 @@ func (d *DbServiceImpl) GetLatestSnapshot(ctx context.Context, tx *sqlx.Tx, url
|
|||||||
if errors.Is(err, sql.ErrNoRows) {
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
return nil, xerrors.NewError(fmt.Errorf("cannot get latest snapshot for URL %s: %w", url, err), 0, "", false)
|
return nil, xerrors.NewError(fmt.Errorf("cannot get latest snapshot for URL %s: %w", url, err), 0, "", true)
|
||||||
}
|
}
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user