diff options
author | 2022-09-25 12:09:41 +0100 | |
---|---|---|
committer | 2022-09-25 13:09:41 +0200 | |
commit | 7a1aa04bbbc9f95442c8850ef61d1d58bb12df74 (patch) | |
tree | 7b05a52914deb1f838539a3196edca287942fd95 /internal/federation/dereferencing/thread.go | |
parent | [bugfix] Wrap media reader in length reader to determine length if no `conten... (diff) | |
download | gotosocial-7a1aa04bbbc9f95442c8850ef61d1d58bb12df74.tar.xz |
[bugfix] update thread iterators to not use recursion (#851)
* update thread iterators to not use recursion, rewrote both
Signed-off-by: kim <grufwub@gmail.com>
* fix endless descendant deref, don't error if fetching existing status
Signed-off-by: kim <grufwub@gmail.com>
* don't refetch remote ancestor statuses, improve descendant iter commenting
Signed-off-by: kim <grufwub@gmail.com>
* move collection page next logic so we capture first page of entities
Signed-off-by: kim <grufwub@gmail.com>
* improve log format argument quoting
Signed-off-by: kim <grufwub@gmail.com>
* improve code commenting of collection paging
Signed-off-by: kim <grufwub@gmail.com>
* only dereference announce's originating status if _not_ local. update DereferenceThread() signature. cleanup searchStatusByURI()
Signed-off-by: kim <grufwub@gmail.com>
Signed-off-by: kim <grufwub@gmail.com>
Diffstat (limited to 'internal/federation/dereferencing/thread.go')
-rw-r--r-- | internal/federation/dereferencing/thread.go | 363 |
1 files changed, 216 insertions, 147 deletions
diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go index 7d7431110..d15f05964 100644 --- a/internal/federation/dereferencing/thread.go +++ b/internal/federation/dereferencing/thread.go @@ -24,218 +24,287 @@ import ( "net/url" "codeberg.org/gruf/go-kv" + "github.com/superseriousbusiness/activity/streams/vocab" "github.com/superseriousbusiness/gotosocial/internal/ap" "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/uris" ) +// maxIter defines how many iterations of descendants or +// ancesters we are willing to follow before returning error. +const maxIter = 1000 + // DereferenceThread takes a statusable (something that has withReplies and withInReplyTo), // and dereferences statusables in the conversation. // // This process involves working up and down the chain of replies, and parsing through the collections of IDs // presented by remote instances as part of their replies collections, and will likely involve making several calls to // multiple different hosts. -func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error { +// +// This does not return error, as for robustness we do not want to error-out on a status because another further up / down has issues. +func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) { l := log.WithFields(kv.Fields{ - {"username", username}, - {"statusIRI", statusIRI}, + {"statusIRI", status.URI}, }...) - l.Trace("entering DereferenceThread") - // if it's our status we already have everything stashed so we can bail early - if statusIRI.Host == config.GetHost() { - l.Trace("iri belongs to us, bailing") - return nil - } + // Log function start + l.Trace("beginning") - // first make sure we have this status in our db - _, statusable, err := d.GetRemoteStatus(ctx, username, statusIRI, true, false) - if err != nil { - return fmt.Errorf("DereferenceThread: error getting initial status with id %s: %s", statusIRI.String(), err) + // Ensure that ancestors have been fully dereferenced + if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil { + l.Errorf("error dereferencing status ancestors: %v", err) + // we don't return error, we have deref'd as much as we can } - // first iterate up through ancestors, dereferencing if necessary as we go - if err := d.iterateAncestors(ctx, username, *statusIRI); err != nil { - return fmt.Errorf("error iterating ancestors of status %s: %s", statusIRI.String(), err) + // Ensure that descendants have been fully dereferenced + if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil { + l.Errorf("error dereferencing status descendants: %v", err) + // we don't return error, we have deref'd as much as we can } - - // now iterate down through descendants, again dereferencing as we go - if err := d.iterateDescendants(ctx, username, *statusIRI, statusable); err != nil { - return fmt.Errorf("error iterating descendants of status %s: %s", statusIRI.String(), err) - } - - return nil } -// iterateAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way. -func (d *deref) iterateAncestors(ctx context.Context, username string, statusIRI url.URL) error { - l := log.WithFields(kv.Fields{ +// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way. +func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error { + // Take ref to original + ogIRI := status.URI + // Start log entry with fields + l := log.WithFields(kv.Fields{ {"username", username}, - {"statusIRI", statusIRI}, + {"statusIRI", ogIRI}, }...) - l.Trace("entering iterateAncestors") - // if it's our status we don't need to dereference anything so we can immediately move up the chain - if statusIRI.Host == config.GetHost() { - l.Trace("iri belongs to us, moving up to next ancestor") - - // since this is our status, we know we can extract the id from the status path - _, id, err := uris.ParseStatusesPath(&statusIRI) - if err != nil { - return err - } - - status, err := d.db.GetStatusByID(ctx, id) - if err != nil { - return err - } + // Log function start + l.Trace("beginning") + for i := 0; i < maxIter; i++ { if status.InReplyToURI == "" { // status doesn't reply to anything return nil } - nextIRI, err := url.Parse(status.URI) + // Parse this status's replied IRI + replyIRI, err := url.Parse(status.InReplyToURI) if err != nil { - return err + return fmt.Errorf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err) } - return d.iterateAncestors(ctx, username, *nextIRI) - } + if replyIRI.Host == config.GetHost() { + l.Tracef("following local status ancestors: %s", status.InReplyToURI) - // If we reach here, we're looking at a remote status - _, statusable, err := d.GetRemoteStatus(ctx, username, &statusIRI, true, false) - if err != nil { - l.Debugf("couldn't get remote status %s: %s; can't iterate any more ancestors", statusIRI.String(), err) - return nil - } + // This is our status, extract ID from path + _, id, err := uris.ParseStatusesPath(replyIRI) + if err != nil { + return fmt.Errorf("invalid local status IRI %q: %w", status.InReplyToURI, err) + } + + // Fetch this status from the database + localStatus, err := d.db.GetStatusByID(ctx, id) + if err != nil { + return fmt.Errorf("error fetching local status %q: %w", id, err) + } + + // Set the fetched status + status = localStatus + + } else { + l.Tracef("following remote status ancestors: %s", status.InReplyToURI) + + // Fetch the remote status found at this IRI + remoteStatus, _, err := d.GetRemoteStatus(ctx, username, replyIRI, false, false) + if err != nil { + return fmt.Errorf("error fetching remote status %q: %w", status.InReplyToURI, err) + } - inReplyTo := ap.ExtractInReplyToURI(statusable) - if inReplyTo == nil || inReplyTo.String() == "" { - // status doesn't reply to anything - return nil + // Set the fetched status + status = remoteStatus + } } - // now move up to the next ancestor - return d.iterateAncestors(ctx, username, *inReplyTo) + return fmt.Errorf("reached %d ancestor iterations for %q", maxIter, ogIRI) } -func (d *deref) iterateDescendants(ctx context.Context, username string, statusIRI url.URL, statusable ap.Statusable) error { - l := log.WithFields(kv.Fields{ +func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error { + // Take ref to original + ogIRI := statusIRI + // Start log entry with fields + l := log.WithFields(kv.Fields{ {"username", username}, - {"statusIRI", statusIRI}, + {"statusIRI", ogIRI}, }...) - l.Trace("entering iterateDescendants") - - // if it's our status we already have descendants stashed so we can bail early - if statusIRI.Host == config.GetHost() { - l.Trace("iri belongs to us, bailing") - return nil - } - replies := statusable.GetActivityStreamsReplies() - if replies == nil || !replies.IsActivityStreamsCollection() { - l.Trace("no replies, bailing") - return nil + // Log function start + l.Trace("beginning") + + // frame represents a single stack frame when iteratively + // dereferencing status descendants. where statusIRI and + // statusable are of the status whose children we are to + // descend, page is the current activity streams collection + // page of entities we are on (as we often push a frame to + // stack mid-paging), and item___ are entity iterators for + // this activity streams collection page. + type frame struct { + statusIRI *url.URL + statusable ap.Statusable + page ap.CollectionPageable + itemIter vocab.ActivityStreamsItemsPropertyIterator + iterLen int + iterIdx int } - repliesCollection := replies.GetActivityStreamsCollection() - if repliesCollection == nil { - l.Trace("replies collection is nil, bailing") - return nil - } + var ( + // current is the current stack frame + current *frame + + // stack is a list of "shelved" descendand iterator + // frames. this is pushed to when a child status frame + // is found that we need to further iterate down, and + // popped from into 'current' when that child's tree + // of further descendants is exhausted. + stack = []*frame{ + { + // Starting input is first frame + statusIRI: statusIRI, + statusable: parent, + }, + } - first := repliesCollection.GetActivityStreamsFirst() - if first == nil { - l.Trace("replies collection has no first, bailing") - return nil - } + // popStack will remove and return the top frame + // from the stack, or nil if currently empty. + popStack = func() *frame { + if len(stack) == 0 { + return nil + } - firstPage := first.GetActivityStreamsCollectionPage() - if firstPage == nil { - l.Trace("first has no collection page, bailing") - return nil - } + // Get frame index + idx := len(stack) - 1 - firstPageNext := firstPage.GetActivityStreamsNext() - if firstPageNext == nil || !firstPageNext.IsIRI() { - l.Trace("next is not an iri, bailing") - return nil - } + // Pop last frame + frame := stack[idx] + stack = stack[:idx] - var foundReplies int - currentPageIRI := firstPageNext.GetIRI() + return frame + } + ) -pageLoop: - for { - l.Tracef("dereferencing page %s", currentPageIRI) - collectionPage, err := d.DereferenceCollectionPage(ctx, username, currentPageIRI) - if err != nil { - l.Debugf("couldn't get remote collection page %s: %s; breaking pageLoop", currentPageIRI, err) - break pageLoop +stackLoop: + for i := 0; i < maxIter; i++ { + // Pop next frame, nil means we are at end + if current = popStack(); current == nil { + return nil } - pageItems := collectionPage.GetActivityStreamsItems() - if pageItems.Len() == 0 { - // no items on this page, which means we're done - break pageLoop + if current.page == nil { + // This is a local status, no looping to do + if current.statusIRI.Host == config.GetHost() { + continue stackLoop + } + + l.Tracef("following remote status descendants: %s", current.statusIRI) + + // Look for an attached status replies (as collection) + replies := current.statusable.GetActivityStreamsReplies() + if replies == nil || !replies.IsActivityStreamsCollection() { + continue stackLoop + } + + // Get the status replies collection + collection := replies.GetActivityStreamsCollection() + + // Get the "first" property of the replies collection + first := collection.GetActivityStreamsFirst() + if first == nil || !first.IsActivityStreamsCollectionPage() { + continue stackLoop + } + + // Set the first activity stream collection page + current.page = first.GetActivityStreamsCollectionPage() } - // have a look through items and see what we can find - for iter := pageItems.Begin(); iter != pageItems.End(); iter = iter.Next() { - // We're looking for a url to feed to GetRemoteStatus. - // Each item can be either an IRI, or a Note. - // If a note, we grab the ID from it and call it, rather than parsing the note. - var itemURI *url.URL - switch { - case iter.IsIRI(): - // iri, easy - itemURI = iter.GetIRI() - case iter.IsActivityStreamsNote(): - // note, get the id from it to use as iri - note := iter.GetActivityStreamsNote() - noteID := note.GetJSONLDId() - if noteID != nil && noteID.IsIRI() { - itemURI = noteID.GetIRI() + for /* page loop */ { + if current.itemIter == nil { + // Check this page contains any items... + items := current.page.GetActivityStreamsItems() + if current.iterLen = items.Len(); current.iterLen == 0 { + continue stackLoop } - default: - // if it's not an iri or a note, we don't know how to process it - continue - } - if itemURI.Host == config.GetHost() { - // skip if the reply is from us -- we already have it then - continue + // Start off the item iterator + current.itemIter = items.Begin() + current.iterIdx = -1 } - // we can confidently say now that we found something - foundReplies++ + itemLoop: + for current.iterIdx++; current.iterIdx < current.iterLen; current.iterIdx++ { + var itemIRI *url.URL + + // Get next item iterator object + current.itemIter = current.itemIter.Next() - // get the remote statusable and put it in the db - _, statusable, err := d.GetRemoteStatus(ctx, username, itemURI, true, false) - if err == nil { - // now iterate descendants of *that* status - if err := d.iterateDescendants(ctx, username, *itemURI, statusable); err != nil { - continue + switch { + // Item is already an IRI + case current.itemIter.IsIRI(): + itemIRI = current.itemIter.GetIRI() + + // Item is a note, get the note ID IRI + case current.itemIter.IsActivityStreamsNote(): + note := current.itemIter.GetActivityStreamsNote() + if id := note.GetJSONLDId(); id != nil && id.IsIRI() { + itemIRI = id.GetIRI() + } + } + + if itemIRI == nil { + // Unusable iter object + continue itemLoop + } + + if itemIRI.Host == config.GetHost() { + // This child is one of ours, + continue itemLoop + } + + // Dereference the remote status and store in the database + _, statusable, err := d.GetRemoteStatus(ctx, username, itemIRI, true, false) + if err != nil { + l.Errorf("error dereferencing remote status %q: %s", itemIRI.String(), err) + continue itemLoop } + + // Put current and next frame at top of stack + stack = append(stack, current, &frame{ + statusIRI: itemIRI, + statusable: statusable, + }) } - } - nextPage := collectionPage.GetActivityStreamsNext() - if nextPage != nil && nextPage.IsIRI() { - nextPageIRI := nextPage.GetIRI() - l.Tracef("moving on to next page %s", nextPageIRI) - currentPageIRI = nextPageIRI - } else { - l.Trace("no next page, bailing") - break pageLoop + // Item iterator is done + current.itemIter = nil + + // Get the current page's "next" property + pageNext := current.page.GetActivityStreamsNext() + if pageNext == nil || !pageNext.IsIRI() { + continue stackLoop + } + + // Get the "next" page property IRI + pageNextIRI := pageNext.GetIRI() + + // Dereference this next collection page by its IRI + collectionPage, err := d.DereferenceCollectionPage(ctx, username, pageNextIRI) + if err != nil { + l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err) + continue stackLoop + } + + // Set the updated collection page + current.page = collectionPage } } - l.Debugf("foundReplies %d", foundReplies) - return nil + return fmt.Errorf("reached %d descendant iterations for %q", maxIter, ogIRI.String()) } |