summaryrefslogtreecommitdiff
path: root/internal/federation/dereferencing/thread.go
diff options
context:
space:
mode:
authorLibravatar kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com>2022-09-25 12:09:41 +0100
committerLibravatar GitHub <noreply@github.com>2022-09-25 13:09:41 +0200
commit7a1aa04bbbc9f95442c8850ef61d1d58bb12df74 (patch)
tree7b05a52914deb1f838539a3196edca287942fd95 /internal/federation/dereferencing/thread.go
parent[bugfix] Wrap media reader in length reader to determine length if no `conten... (diff)
downloadgotosocial-7a1aa04bbbc9f95442c8850ef61d1d58bb12df74.tar.xz
[bugfix] update thread iterators to not use recursion (#851)
* update thread iterators to not use recursion, rewrote both Signed-off-by: kim <grufwub@gmail.com> * fix endless descendant deref, don't error if fetching existing status Signed-off-by: kim <grufwub@gmail.com> * don't refetch remote ancestor statuses, improve descendant iter commenting Signed-off-by: kim <grufwub@gmail.com> * move collection page next logic so we capture first page of entities Signed-off-by: kim <grufwub@gmail.com> * improve log format argument quoting Signed-off-by: kim <grufwub@gmail.com> * improve code commenting of collection paging Signed-off-by: kim <grufwub@gmail.com> * only dereference announce's originating status if _not_ local. update DereferenceThread() signature. cleanup searchStatusByURI() Signed-off-by: kim <grufwub@gmail.com> Signed-off-by: kim <grufwub@gmail.com>
Diffstat (limited to 'internal/federation/dereferencing/thread.go')
-rw-r--r--internal/federation/dereferencing/thread.go363
1 files changed, 216 insertions, 147 deletions
diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go
index 7d7431110..d15f05964 100644
--- a/internal/federation/dereferencing/thread.go
+++ b/internal/federation/dereferencing/thread.go
@@ -24,218 +24,287 @@ import (
"net/url"
"codeberg.org/gruf/go-kv"
+ "github.com/superseriousbusiness/activity/streams/vocab"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/config"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
+// maxIter defines how many iterations of descendants or
+// ancesters we are willing to follow before returning error.
+const maxIter = 1000
+
// DereferenceThread takes a statusable (something that has withReplies and withInReplyTo),
// and dereferences statusables in the conversation.
//
// This process involves working up and down the chain of replies, and parsing through the collections of IDs
// presented by remote instances as part of their replies collections, and will likely involve making several calls to
// multiple different hosts.
-func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error {
+//
+// This does not return error, as for robustness we do not want to error-out on a status because another further up / down has issues.
+func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {
l := log.WithFields(kv.Fields{
-
{"username", username},
- {"statusIRI", statusIRI},
+ {"statusIRI", status.URI},
}...)
- l.Trace("entering DereferenceThread")
- // if it's our status we already have everything stashed so we can bail early
- if statusIRI.Host == config.GetHost() {
- l.Trace("iri belongs to us, bailing")
- return nil
- }
+ // Log function start
+ l.Trace("beginning")
- // first make sure we have this status in our db
- _, statusable, err := d.GetRemoteStatus(ctx, username, statusIRI, true, false)
- if err != nil {
- return fmt.Errorf("DereferenceThread: error getting initial status with id %s: %s", statusIRI.String(), err)
+ // Ensure that ancestors have been fully dereferenced
+ if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil {
+ l.Errorf("error dereferencing status ancestors: %v", err)
+ // we don't return error, we have deref'd as much as we can
}
- // first iterate up through ancestors, dereferencing if necessary as we go
- if err := d.iterateAncestors(ctx, username, *statusIRI); err != nil {
- return fmt.Errorf("error iterating ancestors of status %s: %s", statusIRI.String(), err)
+ // Ensure that descendants have been fully dereferenced
+ if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil {
+ l.Errorf("error dereferencing status descendants: %v", err)
+ // we don't return error, we have deref'd as much as we can
}
-
- // now iterate down through descendants, again dereferencing as we go
- if err := d.iterateDescendants(ctx, username, *statusIRI, statusable); err != nil {
- return fmt.Errorf("error iterating descendants of status %s: %s", statusIRI.String(), err)
- }
-
- return nil
}
-// iterateAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
-func (d *deref) iterateAncestors(ctx context.Context, username string, statusIRI url.URL) error {
- l := log.WithFields(kv.Fields{
+// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
+func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error {
+ // Take ref to original
+ ogIRI := status.URI
+ // Start log entry with fields
+ l := log.WithFields(kv.Fields{
{"username", username},
- {"statusIRI", statusIRI},
+ {"statusIRI", ogIRI},
}...)
- l.Trace("entering iterateAncestors")
- // if it's our status we don't need to dereference anything so we can immediately move up the chain
- if statusIRI.Host == config.GetHost() {
- l.Trace("iri belongs to us, moving up to next ancestor")
-
- // since this is our status, we know we can extract the id from the status path
- _, id, err := uris.ParseStatusesPath(&statusIRI)
- if err != nil {
- return err
- }
-
- status, err := d.db.GetStatusByID(ctx, id)
- if err != nil {
- return err
- }
+ // Log function start
+ l.Trace("beginning")
+ for i := 0; i < maxIter; i++ {
if status.InReplyToURI == "" {
// status doesn't reply to anything
return nil
}
- nextIRI, err := url.Parse(status.URI)
+ // Parse this status's replied IRI
+ replyIRI, err := url.Parse(status.InReplyToURI)
if err != nil {
- return err
+ return fmt.Errorf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err)
}
- return d.iterateAncestors(ctx, username, *nextIRI)
- }
+ if replyIRI.Host == config.GetHost() {
+ l.Tracef("following local status ancestors: %s", status.InReplyToURI)
- // If we reach here, we're looking at a remote status
- _, statusable, err := d.GetRemoteStatus(ctx, username, &statusIRI, true, false)
- if err != nil {
- l.Debugf("couldn't get remote status %s: %s; can't iterate any more ancestors", statusIRI.String(), err)
- return nil
- }
+ // This is our status, extract ID from path
+ _, id, err := uris.ParseStatusesPath(replyIRI)
+ if err != nil {
+ return fmt.Errorf("invalid local status IRI %q: %w", status.InReplyToURI, err)
+ }
+
+ // Fetch this status from the database
+ localStatus, err := d.db.GetStatusByID(ctx, id)
+ if err != nil {
+ return fmt.Errorf("error fetching local status %q: %w", id, err)
+ }
+
+ // Set the fetched status
+ status = localStatus
+
+ } else {
+ l.Tracef("following remote status ancestors: %s", status.InReplyToURI)
+
+ // Fetch the remote status found at this IRI
+ remoteStatus, _, err := d.GetRemoteStatus(ctx, username, replyIRI, false, false)
+ if err != nil {
+ return fmt.Errorf("error fetching remote status %q: %w", status.InReplyToURI, err)
+ }
- inReplyTo := ap.ExtractInReplyToURI(statusable)
- if inReplyTo == nil || inReplyTo.String() == "" {
- // status doesn't reply to anything
- return nil
+ // Set the fetched status
+ status = remoteStatus
+ }
}
- // now move up to the next ancestor
- return d.iterateAncestors(ctx, username, *inReplyTo)
+ return fmt.Errorf("reached %d ancestor iterations for %q", maxIter, ogIRI)
}
-func (d *deref) iterateDescendants(ctx context.Context, username string, statusIRI url.URL, statusable ap.Statusable) error {
- l := log.WithFields(kv.Fields{
+func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
+ // Take ref to original
+ ogIRI := statusIRI
+ // Start log entry with fields
+ l := log.WithFields(kv.Fields{
{"username", username},
- {"statusIRI", statusIRI},
+ {"statusIRI", ogIRI},
}...)
- l.Trace("entering iterateDescendants")
-
- // if it's our status we already have descendants stashed so we can bail early
- if statusIRI.Host == config.GetHost() {
- l.Trace("iri belongs to us, bailing")
- return nil
- }
- replies := statusable.GetActivityStreamsReplies()
- if replies == nil || !replies.IsActivityStreamsCollection() {
- l.Trace("no replies, bailing")
- return nil
+ // Log function start
+ l.Trace("beginning")
+
+ // frame represents a single stack frame when iteratively
+ // dereferencing status descendants. where statusIRI and
+ // statusable are of the status whose children we are to
+ // descend, page is the current activity streams collection
+ // page of entities we are on (as we often push a frame to
+ // stack mid-paging), and item___ are entity iterators for
+ // this activity streams collection page.
+ type frame struct {
+ statusIRI *url.URL
+ statusable ap.Statusable
+ page ap.CollectionPageable
+ itemIter vocab.ActivityStreamsItemsPropertyIterator
+ iterLen int
+ iterIdx int
}
- repliesCollection := replies.GetActivityStreamsCollection()
- if repliesCollection == nil {
- l.Trace("replies collection is nil, bailing")
- return nil
- }
+ var (
+ // current is the current stack frame
+ current *frame
+
+ // stack is a list of "shelved" descendand iterator
+ // frames. this is pushed to when a child status frame
+ // is found that we need to further iterate down, and
+ // popped from into 'current' when that child's tree
+ // of further descendants is exhausted.
+ stack = []*frame{
+ {
+ // Starting input is first frame
+ statusIRI: statusIRI,
+ statusable: parent,
+ },
+ }
- first := repliesCollection.GetActivityStreamsFirst()
- if first == nil {
- l.Trace("replies collection has no first, bailing")
- return nil
- }
+ // popStack will remove and return the top frame
+ // from the stack, or nil if currently empty.
+ popStack = func() *frame {
+ if len(stack) == 0 {
+ return nil
+ }
- firstPage := first.GetActivityStreamsCollectionPage()
- if firstPage == nil {
- l.Trace("first has no collection page, bailing")
- return nil
- }
+ // Get frame index
+ idx := len(stack) - 1
- firstPageNext := firstPage.GetActivityStreamsNext()
- if firstPageNext == nil || !firstPageNext.IsIRI() {
- l.Trace("next is not an iri, bailing")
- return nil
- }
+ // Pop last frame
+ frame := stack[idx]
+ stack = stack[:idx]
- var foundReplies int
- currentPageIRI := firstPageNext.GetIRI()
+ return frame
+ }
+ )
-pageLoop:
- for {
- l.Tracef("dereferencing page %s", currentPageIRI)
- collectionPage, err := d.DereferenceCollectionPage(ctx, username, currentPageIRI)
- if err != nil {
- l.Debugf("couldn't get remote collection page %s: %s; breaking pageLoop", currentPageIRI, err)
- break pageLoop
+stackLoop:
+ for i := 0; i < maxIter; i++ {
+ // Pop next frame, nil means we are at end
+ if current = popStack(); current == nil {
+ return nil
}
- pageItems := collectionPage.GetActivityStreamsItems()
- if pageItems.Len() == 0 {
- // no items on this page, which means we're done
- break pageLoop
+ if current.page == nil {
+ // This is a local status, no looping to do
+ if current.statusIRI.Host == config.GetHost() {
+ continue stackLoop
+ }
+
+ l.Tracef("following remote status descendants: %s", current.statusIRI)
+
+ // Look for an attached status replies (as collection)
+ replies := current.statusable.GetActivityStreamsReplies()
+ if replies == nil || !replies.IsActivityStreamsCollection() {
+ continue stackLoop
+ }
+
+ // Get the status replies collection
+ collection := replies.GetActivityStreamsCollection()
+
+ // Get the "first" property of the replies collection
+ first := collection.GetActivityStreamsFirst()
+ if first == nil || !first.IsActivityStreamsCollectionPage() {
+ continue stackLoop
+ }
+
+ // Set the first activity stream collection page
+ current.page = first.GetActivityStreamsCollectionPage()
}
- // have a look through items and see what we can find
- for iter := pageItems.Begin(); iter != pageItems.End(); iter = iter.Next() {
- // We're looking for a url to feed to GetRemoteStatus.
- // Each item can be either an IRI, or a Note.
- // If a note, we grab the ID from it and call it, rather than parsing the note.
- var itemURI *url.URL
- switch {
- case iter.IsIRI():
- // iri, easy
- itemURI = iter.GetIRI()
- case iter.IsActivityStreamsNote():
- // note, get the id from it to use as iri
- note := iter.GetActivityStreamsNote()
- noteID := note.GetJSONLDId()
- if noteID != nil && noteID.IsIRI() {
- itemURI = noteID.GetIRI()
+ for /* page loop */ {
+ if current.itemIter == nil {
+ // Check this page contains any items...
+ items := current.page.GetActivityStreamsItems()
+ if current.iterLen = items.Len(); current.iterLen == 0 {
+ continue stackLoop
}
- default:
- // if it's not an iri or a note, we don't know how to process it
- continue
- }
- if itemURI.Host == config.GetHost() {
- // skip if the reply is from us -- we already have it then
- continue
+ // Start off the item iterator
+ current.itemIter = items.Begin()
+ current.iterIdx = -1
}
- // we can confidently say now that we found something
- foundReplies++
+ itemLoop:
+ for current.iterIdx++; current.iterIdx < current.iterLen; current.iterIdx++ {
+ var itemIRI *url.URL
+
+ // Get next item iterator object
+ current.itemIter = current.itemIter.Next()
- // get the remote statusable and put it in the db
- _, statusable, err := d.GetRemoteStatus(ctx, username, itemURI, true, false)
- if err == nil {
- // now iterate descendants of *that* status
- if err := d.iterateDescendants(ctx, username, *itemURI, statusable); err != nil {
- continue
+ switch {
+ // Item is already an IRI
+ case current.itemIter.IsIRI():
+ itemIRI = current.itemIter.GetIRI()
+
+ // Item is a note, get the note ID IRI
+ case current.itemIter.IsActivityStreamsNote():
+ note := current.itemIter.GetActivityStreamsNote()
+ if id := note.GetJSONLDId(); id != nil && id.IsIRI() {
+ itemIRI = id.GetIRI()
+ }
+ }
+
+ if itemIRI == nil {
+ // Unusable iter object
+ continue itemLoop
+ }
+
+ if itemIRI.Host == config.GetHost() {
+ // This child is one of ours,
+ continue itemLoop
+ }
+
+ // Dereference the remote status and store in the database
+ _, statusable, err := d.GetRemoteStatus(ctx, username, itemIRI, true, false)
+ if err != nil {
+ l.Errorf("error dereferencing remote status %q: %s", itemIRI.String(), err)
+ continue itemLoop
}
+
+ // Put current and next frame at top of stack
+ stack = append(stack, current, &frame{
+ statusIRI: itemIRI,
+ statusable: statusable,
+ })
}
- }
- nextPage := collectionPage.GetActivityStreamsNext()
- if nextPage != nil && nextPage.IsIRI() {
- nextPageIRI := nextPage.GetIRI()
- l.Tracef("moving on to next page %s", nextPageIRI)
- currentPageIRI = nextPageIRI
- } else {
- l.Trace("no next page, bailing")
- break pageLoop
+ // Item iterator is done
+ current.itemIter = nil
+
+ // Get the current page's "next" property
+ pageNext := current.page.GetActivityStreamsNext()
+ if pageNext == nil || !pageNext.IsIRI() {
+ continue stackLoop
+ }
+
+ // Get the "next" page property IRI
+ pageNextIRI := pageNext.GetIRI()
+
+ // Dereference this next collection page by its IRI
+ collectionPage, err := d.DereferenceCollectionPage(ctx, username, pageNextIRI)
+ if err != nil {
+ l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err)
+ continue stackLoop
+ }
+
+ // Set the updated collection page
+ current.page = collectionPage
}
}
- l.Debugf("foundReplies %d", foundReplies)
- return nil
+ return fmt.Errorf("reached %d descendant iterations for %q", maxIter, ogIRI.String())
}