diff options
Diffstat (limited to 'internal/federation')
-rw-r--r-- | internal/federation/dereferencing/dereferencer.go | 6 | ||||
-rw-r--r-- | internal/federation/dereferencing/status.go | 19 | ||||
-rw-r--r-- | internal/federation/dereferencing/thread.go | 208 |
3 files changed, 173 insertions, 60 deletions
diff --git a/internal/federation/dereferencing/dereferencer.go b/internal/federation/dereferencing/dereferencer.go index 2902ebcbc..170fb6119 100644 --- a/internal/federation/dereferencing/dereferencer.go +++ b/internal/federation/dereferencing/dereferencer.go @@ -66,6 +66,12 @@ type Dereferencer interface { // This is a more optimized form of manually enqueueing .UpdateStatus() to the federation worker, since it only enqueues update if necessary. RefreshStatusAsync(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) + // DereferenceStatusAncestors iterates upwards from the given status, using InReplyToURI, to ensure that as many parent statuses as possible are dereferenced. + DereferenceStatusAncestors(ctx context.Context, requestUser string, status *gtsmodel.Status) error + + // DereferenceStatusDescendents iterates downwards from the given status, using its replies, to ensure that as many children statuses as possible are dereferenced. + DereferenceStatusDescendants(ctx context.Context, requestUser string, statusIRI *url.URL, parent ap.Statusable) error + GetRemoteInstance(ctx context.Context, username string, remoteInstanceURI *url.URL) (*gtsmodel.Instance, error) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index 75adfdd6f..4525f64a9 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -104,7 +104,7 @@ func (d *deref) getStatusByURI(ctx context.Context, requestUser string, uri *url } if status == nil { - // Ensure that this is isn't a search for a local status. + // Ensure that this isn't a search for a local status. if uri.Host == config.GetHost() || uri.Host == config.GetAccountDomain() { return nil, nil, gtserror.SetUnretrievable(err) // this will be db.ErrNoEntries } @@ -149,7 +149,7 @@ func (d *deref) getStatusByURI(ctx context.Context, requestUser string, uri *url // RefreshStatus: implements Dereferencer{}.RefreshStatus(). func (d *deref) RefreshStatus(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) (*gtsmodel.Status, ap.Statusable, error) { // Check whether needs update. - if statusUpToDate(status) { + if !force && statusUpToDate(status) { return status, nil, nil } @@ -205,8 +205,16 @@ func (d *deref) RefreshStatusAsync(ctx context.Context, requestUser string, stat }) } -// enrichStatus will enrich the given status, whether a new barebones model, or existing model from the database. It handles necessary dereferencing etc. -func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.URL, status *gtsmodel.Status, apubStatus ap.Statusable) (*gtsmodel.Status, ap.Statusable, error) { +// enrichStatus will enrich the given status, whether a new +// barebones model, or existing model from the database. +// It handles necessary dereferencing, database updates, etc. +func (d *deref) enrichStatus( + ctx context.Context, + requestUser string, + uri *url.URL, + status *gtsmodel.Status, + apubStatus ap.Statusable, +) (*gtsmodel.Status, ap.Statusable, error) { // Pre-fetch a transport for requesting username, used by later dereferencing. tsport, err := d.transportController.NewTransportForUsername(ctx, requestUser) if err != nil { @@ -217,7 +225,8 @@ func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.U if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil { return nil, nil, gtserror.Newf("error checking blocked domain: %w", err) } else if blocked { - return nil, nil, gtserror.Newf("%s is blocked", uri.Host) + err = gtserror.Newf("%s is blocked", uri.Host) + return nil, nil, gtserror.SetUnretrievable(err) } if apubStatus == nil { diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go index a12e537bc..a81849e54 100644 --- a/internal/federation/dereferencing/thread.go +++ b/internal/federation/dereferencing/thread.go @@ -19,6 +19,8 @@ package dereferencing import ( "context" + "errors" + "net/http" "net/url" "codeberg.org/gruf/go-kv" @@ -26,96 +28,184 @@ import ( "github.com/superseriousbusiness/activity/streams/vocab" "github.com/superseriousbusiness/gotosocial/internal/ap" "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/uris" ) // maxIter defines how many iterations of descendants or // ancesters we are willing to follow before returning error. const maxIter = 1000 -// dereferenceThread will dereference statuses both above and below the given status in a thread, it returns no error and is intended to be called asychronously. func (d *deref) dereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) { // Ensure that ancestors have been fully dereferenced - if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil { - log.Error(ctx, err) // log entry and error will include caller prefixes + if err := d.DereferenceStatusAncestors(ctx, username, status); err != nil { + log.Error(ctx, err) } // Ensure that descendants have been fully dereferenced - if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil { - log.Error(ctx, err) // log entry and error will include caller prefixes + if err := d.DereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil { + log.Error(ctx, err) } } -// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way. -func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error { - // Take ref to original - ogIRI := status.URI - - // Start log entry with fields - l := log.WithContext(ctx). - WithFields(kv.Fields{ - {"username", username}, - {"statusIRI", ogIRI}, - }...) - - // Log function start - l.Trace("beginning") +func (d *deref) DereferenceStatusAncestors( + ctx context.Context, + username string, + status *gtsmodel.Status, +) error { + // Mark given status as the one + // we're currently working on. + var current = status for i := 0; i < maxIter; i++ { - if status.InReplyToURI == "" { - // status doesn't reply to anything + if current.InReplyToURI == "" { + // Status has no parent, we've + // reached the top of the chain. return nil } - // Parse this status's replied IRI - replyIRI, err := url.Parse(status.InReplyToURI) - if err != nil { - return gtserror.Newf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err) - } - - if replyIRI.Host == config.GetHost() { - l.Tracef("following local status ancestors: %s", status.InReplyToURI) + l := log. + WithContext(ctx). + WithFields(kv.Fields{ + {"username", username}, + {"originalStatusIRI", status.URI}, + {"currentStatusURI", current.URI}, + {"currentInReplyToURI", current.InReplyToURI}, + }...) + + if current.InReplyToID != "" { + // We already have an InReplyToID set. This means + // the status's parent has, at some point, been + // inserted into the database, either because it + // is a status from our instance, or a status from + // remote that we've dereferenced before, or found + // out about in some other way. + // + // Working on this assumption, check if the parent + // status exists, either as a copy pinned on the + // current status, or in the database. + + if current.InReplyTo != nil { + // We have the parent already, and the child + // doesn't need to be updated; keep iterating + // from this parent upwards. + current = current.InReplyTo + continue + } - // This is our status, extract ID from path - _, id, err := uris.ParseStatusesPath(replyIRI) - if err != nil { - return gtserror.Newf("invalid local status IRI %q: %w", status.InReplyToURI, err) + // Parent isn't pinned to this status (yet), see + // if we can get it from the db (we should be + // able to, since it has an ID already). + parent, err := d.state.DB.GetStatusByID( + gtscontext.SetBarebones(ctx), + current.InReplyToID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + // Real db error, stop. + return gtserror.Newf("db error getting status %s: %w", current.InReplyToID, err) } - // Fetch this status from the database - localStatus, err := d.state.DB.GetStatusByID(ctx, id) - if err != nil { - return gtserror.Newf("error fetching local status %q: %w", id, err) + if parent != nil { + // We got the parent from the db, and the child + // doesn't need to be updated; keep iterating + // from this parent upwards. + current.InReplyTo = parent + current = parent + continue } - // Set the fetched status - status = localStatus + // If we arrive here, we know this child *did* have + // a parent at some point, but it no longer exists in + // the database, presumably because it's been deleted + // by another action. + // + // TODO: clean this up in a nightly task. + l.Warnf("current status has been orphaned (parent %s no longer exists in database)", current.InReplyToID) + return nil // Cannot iterate further. + } - } else { - l.Tracef("following remote status ancestors: %s", status.InReplyToURI) + // If we reach this point, we know the status has + // an InReplyToURI set, but it doesn't yet have an + // InReplyToID, which means that the parent status + // has not yet been dereferenced. + inReplyToURI, err := url.Parse(current.InReplyToURI) + if err != nil || inReplyToURI == nil { + // Parent URI is not something we can handle. + l.Debug("current status has been orphaned (invalid InReplyToURI)") + return nil //nolint:nilerr + } - // Fetch the remote status found at this IRI - remoteStatus, _, err := d.getStatusByURI( - ctx, - username, - replyIRI, - ) - if err != nil { - return gtserror.Newf("error fetching remote status %q: %w", status.InReplyToURI, err) + // Parent URI is valid, try to get it. + // getStatusByURI guards against the following conditions: + // + // - remote domain is blocked (will return unretrievable) + // - domain is local (will try to return something, or + // return unretrievable). + parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI) + if err == nil { + // We successfully fetched the parent. + // Update current status with new info. + current.InReplyToID = parent.ID + current.InReplyToAccountID = parent.AccountID + if err := d.state.DB.UpdateStatus( + ctx, current, + "in_reply_to_id", + "in_reply_to_account_id", + ); err != nil { + return gtserror.Newf("db error updating status %s: %w", current.ID, err) + } + + // Mark parent as next status to + // work on, and keep iterating. + current = parent + continue + } + + // We could not fetch the parent, check if we can do anything + // useful with the error. For example, HTTP status code returned + // from remote may indicate that the parent has been deleted. + switch code := gtserror.StatusCode(err); { + case code == http.StatusGone || code == http.StatusNotFound: + // 410 means the status has definitely been deleted. + // 404 means the status has *probably* been deleted. + // Update this status to reflect that, then bail. + l.Debugf("current status has been orphaned (call to parent returned code %d)", code) + + current.InReplyToURI = "" + if err := d.state.DB.UpdateStatus( + ctx, current, + "in_reply_to_uri", + ); err != nil { + return gtserror.Newf("db error updating status %s: %w", current.ID, err) } + return nil + + case code != 0: + // We had a code, but not one indicating deletion, + // log the code but don't return error or update the + // status; we can try again later. + l.Warnf("cannot dereference parent (%q)", err) + return nil + + case gtserror.Unretrievable(err): + // Not retrievable for some other reason, so just + // bail; we can try again later if necessary. + l.Debugf("parent unretrievable (%q)", err) + return nil - // Set the fetched status - status = remoteStatus + default: + // Some other error that stops us in our tracks. + return gtserror.Newf("error dereferencing parent %s: %w", current.InReplyToURI, err) } } - return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, ogIRI) + return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, status.URI) } -func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error { +func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error { // Take ref to original ogIRI := statusIRI @@ -256,9 +346,17 @@ stackLoop: } // Dereference the remote status and store in the database. + // getStatusByURI guards against the following conditions: + // + // - remote domain is blocked (will return unretrievable) + // - domain is local (will try to return something, or + // return unretrievable). _, statusable, err := d.getStatusByURI(ctx, username, itemIRI) if err != nil { - l.Errorf("error dereferencing remote status %s: %v", itemIRI, err) + if !gtserror.Unretrievable(err) { + l.Errorf("error dereferencing remote status %s: %v", itemIRI, err) + } + continue itemLoop } |