diff options
author | 2023-06-24 09:32:10 +0200 | |
---|---|---|
committer | 2023-06-24 08:32:10 +0100 | |
commit | 3e19f480e63085d48ea4242f46b57105edaa640f (patch) | |
tree | 0cb8ae6aa87dbde884f33a6b3531d10910cab87b /internal/federation/dereferencing/thread.go | |
parent | [bugfix/chore] oauth entropy fix + media cleanup tasks rewrite (#1853) (diff) | |
download | gotosocial-3e19f480e63085d48ea4242f46b57105edaa640f.tar.xz |
[bugfix] Ensure `InReplyToID` set properly, update dereference ancestors func (#1921)
Diffstat (limited to 'internal/federation/dereferencing/thread.go')
-rw-r--r-- | internal/federation/dereferencing/thread.go | 208 |
1 files changed, 153 insertions, 55 deletions
diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go index a12e537bc..a81849e54 100644 --- a/internal/federation/dereferencing/thread.go +++ b/internal/federation/dereferencing/thread.go @@ -19,6 +19,8 @@ package dereferencing import ( "context" + "errors" + "net/http" "net/url" "codeberg.org/gruf/go-kv" @@ -26,96 +28,184 @@ import ( "github.com/superseriousbusiness/activity/streams/vocab" "github.com/superseriousbusiness/gotosocial/internal/ap" "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/uris" ) // maxIter defines how many iterations of descendants or // ancesters we are willing to follow before returning error. const maxIter = 1000 -// dereferenceThread will dereference statuses both above and below the given status in a thread, it returns no error and is intended to be called asychronously. func (d *deref) dereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) { // Ensure that ancestors have been fully dereferenced - if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil { - log.Error(ctx, err) // log entry and error will include caller prefixes + if err := d.DereferenceStatusAncestors(ctx, username, status); err != nil { + log.Error(ctx, err) } // Ensure that descendants have been fully dereferenced - if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil { - log.Error(ctx, err) // log entry and error will include caller prefixes + if err := d.DereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil { + log.Error(ctx, err) } } -// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way. -func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error { - // Take ref to original - ogIRI := status.URI - - // Start log entry with fields - l := log.WithContext(ctx). - WithFields(kv.Fields{ - {"username", username}, - {"statusIRI", ogIRI}, - }...) - - // Log function start - l.Trace("beginning") +func (d *deref) DereferenceStatusAncestors( + ctx context.Context, + username string, + status *gtsmodel.Status, +) error { + // Mark given status as the one + // we're currently working on. + var current = status for i := 0; i < maxIter; i++ { - if status.InReplyToURI == "" { - // status doesn't reply to anything + if current.InReplyToURI == "" { + // Status has no parent, we've + // reached the top of the chain. return nil } - // Parse this status's replied IRI - replyIRI, err := url.Parse(status.InReplyToURI) - if err != nil { - return gtserror.Newf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err) - } - - if replyIRI.Host == config.GetHost() { - l.Tracef("following local status ancestors: %s", status.InReplyToURI) + l := log. + WithContext(ctx). + WithFields(kv.Fields{ + {"username", username}, + {"originalStatusIRI", status.URI}, + {"currentStatusURI", current.URI}, + {"currentInReplyToURI", current.InReplyToURI}, + }...) + + if current.InReplyToID != "" { + // We already have an InReplyToID set. This means + // the status's parent has, at some point, been + // inserted into the database, either because it + // is a status from our instance, or a status from + // remote that we've dereferenced before, or found + // out about in some other way. + // + // Working on this assumption, check if the parent + // status exists, either as a copy pinned on the + // current status, or in the database. + + if current.InReplyTo != nil { + // We have the parent already, and the child + // doesn't need to be updated; keep iterating + // from this parent upwards. + current = current.InReplyTo + continue + } - // This is our status, extract ID from path - _, id, err := uris.ParseStatusesPath(replyIRI) - if err != nil { - return gtserror.Newf("invalid local status IRI %q: %w", status.InReplyToURI, err) + // Parent isn't pinned to this status (yet), see + // if we can get it from the db (we should be + // able to, since it has an ID already). + parent, err := d.state.DB.GetStatusByID( + gtscontext.SetBarebones(ctx), + current.InReplyToID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + // Real db error, stop. + return gtserror.Newf("db error getting status %s: %w", current.InReplyToID, err) } - // Fetch this status from the database - localStatus, err := d.state.DB.GetStatusByID(ctx, id) - if err != nil { - return gtserror.Newf("error fetching local status %q: %w", id, err) + if parent != nil { + // We got the parent from the db, and the child + // doesn't need to be updated; keep iterating + // from this parent upwards. + current.InReplyTo = parent + current = parent + continue } - // Set the fetched status - status = localStatus + // If we arrive here, we know this child *did* have + // a parent at some point, but it no longer exists in + // the database, presumably because it's been deleted + // by another action. + // + // TODO: clean this up in a nightly task. + l.Warnf("current status has been orphaned (parent %s no longer exists in database)", current.InReplyToID) + return nil // Cannot iterate further. + } - } else { - l.Tracef("following remote status ancestors: %s", status.InReplyToURI) + // If we reach this point, we know the status has + // an InReplyToURI set, but it doesn't yet have an + // InReplyToID, which means that the parent status + // has not yet been dereferenced. + inReplyToURI, err := url.Parse(current.InReplyToURI) + if err != nil || inReplyToURI == nil { + // Parent URI is not something we can handle. + l.Debug("current status has been orphaned (invalid InReplyToURI)") + return nil //nolint:nilerr + } - // Fetch the remote status found at this IRI - remoteStatus, _, err := d.getStatusByURI( - ctx, - username, - replyIRI, - ) - if err != nil { - return gtserror.Newf("error fetching remote status %q: %w", status.InReplyToURI, err) + // Parent URI is valid, try to get it. + // getStatusByURI guards against the following conditions: + // + // - remote domain is blocked (will return unretrievable) + // - domain is local (will try to return something, or + // return unretrievable). + parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI) + if err == nil { + // We successfully fetched the parent. + // Update current status with new info. + current.InReplyToID = parent.ID + current.InReplyToAccountID = parent.AccountID + if err := d.state.DB.UpdateStatus( + ctx, current, + "in_reply_to_id", + "in_reply_to_account_id", + ); err != nil { + return gtserror.Newf("db error updating status %s: %w", current.ID, err) + } + + // Mark parent as next status to + // work on, and keep iterating. + current = parent + continue + } + + // We could not fetch the parent, check if we can do anything + // useful with the error. For example, HTTP status code returned + // from remote may indicate that the parent has been deleted. + switch code := gtserror.StatusCode(err); { + case code == http.StatusGone || code == http.StatusNotFound: + // 410 means the status has definitely been deleted. + // 404 means the status has *probably* been deleted. + // Update this status to reflect that, then bail. + l.Debugf("current status has been orphaned (call to parent returned code %d)", code) + + current.InReplyToURI = "" + if err := d.state.DB.UpdateStatus( + ctx, current, + "in_reply_to_uri", + ); err != nil { + return gtserror.Newf("db error updating status %s: %w", current.ID, err) } + return nil + + case code != 0: + // We had a code, but not one indicating deletion, + // log the code but don't return error or update the + // status; we can try again later. + l.Warnf("cannot dereference parent (%q)", err) + return nil + + case gtserror.Unretrievable(err): + // Not retrievable for some other reason, so just + // bail; we can try again later if necessary. + l.Debugf("parent unretrievable (%q)", err) + return nil - // Set the fetched status - status = remoteStatus + default: + // Some other error that stops us in our tracks. + return gtserror.Newf("error dereferencing parent %s: %w", current.InReplyToURI, err) } } - return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, ogIRI) + return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, status.URI) } -func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error { +func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error { // Take ref to original ogIRI := statusIRI @@ -256,9 +346,17 @@ stackLoop: } // Dereference the remote status and store in the database. + // getStatusByURI guards against the following conditions: + // + // - remote domain is blocked (will return unretrievable) + // - domain is local (will try to return something, or + // return unretrievable). _, statusable, err := d.getStatusByURI(ctx, username, itemIRI) if err != nil { - l.Errorf("error dereferencing remote status %s: %v", itemIRI, err) + if !gtserror.Unretrievable(err) { + l.Errorf("error dereferencing remote status %s: %v", itemIRI, err) + } + continue itemLoop } |