diff options
| author | 2022-09-25 12:09:41 +0100 | |
|---|---|---|
| committer | 2022-09-25 13:09:41 +0200 | |
| commit | 7a1aa04bbbc9f95442c8850ef61d1d58bb12df74 (patch) | |
| tree | 7b05a52914deb1f838539a3196edca287942fd95 /internal/federation/dereferencing | |
| parent | [bugfix] Wrap media reader in length reader to determine length if no `conten... (diff) | |
| download | gotosocial-7a1aa04bbbc9f95442c8850ef61d1d58bb12df74.tar.xz | |
[bugfix] update thread iterators to not use recursion (#851)
* update thread iterators to not use recursion, rewrote both
Signed-off-by: kim <grufwub@gmail.com>
* fix endless descendant deref, don't error if fetching existing status
Signed-off-by: kim <grufwub@gmail.com>
* don't refetch remote ancestor statuses, improve descendant iter commenting
Signed-off-by: kim <grufwub@gmail.com>
* move collection page next logic so we capture first page of entities
Signed-off-by: kim <grufwub@gmail.com>
* improve log format argument quoting
Signed-off-by: kim <grufwub@gmail.com>
* improve code commenting of collection paging
Signed-off-by: kim <grufwub@gmail.com>
* only dereference announce's originating status if _not_ local. update DereferenceThread() signature. cleanup searchStatusByURI()
Signed-off-by: kim <grufwub@gmail.com>
Signed-off-by: kim <grufwub@gmail.com>
Diffstat (limited to 'internal/federation/dereferencing')
| -rw-r--r-- | internal/federation/dereferencing/announce.go | 42 | ||||
| -rw-r--r-- | internal/federation/dereferencing/dereferencer.go | 2 | ||||
| -rw-r--r-- | internal/federation/dereferencing/status.go | 3 | ||||
| -rw-r--r-- | internal/federation/dereferencing/thread.go | 363 | 
4 files changed, 249 insertions, 161 deletions
| diff --git a/internal/federation/dereferencing/announce.go b/internal/federation/dereferencing/announce.go index c740bb20a..144ddcb13 100644 --- a/internal/federation/dereferencing/announce.go +++ b/internal/federation/dereferencing/announce.go @@ -24,31 +24,50 @@ import (  	"fmt"  	"net/url" +	"github.com/superseriousbusiness/gotosocial/internal/config"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  )  func (d *deref) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error { -	if announce.BoostOf == nil || announce.BoostOf.URI == "" { +	if announce.BoostOf == nil {  		// we can't do anything unfortunately  		return errors.New("DereferenceAnnounce: no URI to dereference")  	} -	boostedStatusURI, err := url.Parse(announce.BoostOf.URI) +	// Parse the boosted status' URI +	boostedURI, err := url.Parse(announce.BoostOf.URI)  	if err != nil {  		return fmt.Errorf("DereferenceAnnounce: couldn't parse boosted status URI %s: %s", announce.BoostOf.URI, err)  	} -	if blocked, err := d.db.IsDomainBlocked(ctx, boostedStatusURI.Host); blocked || err != nil { -		return fmt.Errorf("DereferenceAnnounce: domain %s is blocked", boostedStatusURI.Host) -	} -	// dereference statuses in the thread of the boosted status -	if err := d.DereferenceThread(ctx, requestingUsername, boostedStatusURI); err != nil { -		return fmt.Errorf("DereferenceAnnounce: error dereferencing thread of boosted status: %s", err) +	// Check whether the originating status is from a blocked host +	if blocked, err := d.db.IsDomainBlocked(ctx, boostedURI.Host); blocked || err != nil { +		return fmt.Errorf("DereferenceAnnounce: domain %s is blocked", boostedURI.Host)  	} -	boostedStatus, _, err := d.GetRemoteStatus(ctx, requestingUsername, boostedStatusURI, false, true) -	if err != nil { -		return fmt.Errorf("DereferenceAnnounce: error dereferencing remote status with id %s: %s", announce.BoostOf.URI, err) +	var boostedStatus *gtsmodel.Status + +	if boostedURI.Host == config.GetHost() { +		// This is a local status, fetch from the database +		status, err := d.db.GetStatusByURI(ctx, boostedURI.String()) +		if err != nil { +			return fmt.Errorf("DereferenceAnnounce: error fetching local status %q: %v", announce.BoostOf.URI, err) +		} + +		// Set boosted status +		boostedStatus = status +	} else { +		// This is a boost of a remote status, we need to dereference it. +		status, statusable, err := d.GetRemoteStatus(ctx, requestingUsername, boostedURI, true, true) +		if err != nil { +			return fmt.Errorf("DereferenceAnnounce: error dereferencing remote status with id %s: %s", announce.BoostOf.URI, err) +		} + +		// Dereference all statuses in the thread of the boosted status +		d.DereferenceThread(ctx, requestingUsername, boostedURI, status, statusable) + +		// Set boosted status +		boostedStatus = status  	}  	announce.Content = boostedStatus.Content @@ -65,5 +84,6 @@ func (d *deref) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Stat  	announce.Replyable = boostedStatus.Replyable  	announce.Likeable = boostedStatus.Likeable  	announce.BoostOf = boostedStatus +  	return nil  } diff --git a/internal/federation/dereferencing/dereferencer.go b/internal/federation/dereferencing/dereferencer.go index 0fad2405e..331df3215 100644 --- a/internal/federation/dereferencing/dereferencer.go +++ b/internal/federation/dereferencing/dereferencer.go @@ -44,7 +44,7 @@ type Dereferencer interface {  	GetRemoteEmoji(ctx context.Context, requestingUsername string, remoteURL string, shortcode string, id string, emojiURI string, ai *media.AdditionalEmojiInfo) (*media.ProcessingEmoji, error)  	DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error -	DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error +	DereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable)  	Handshaking(ctx context.Context, username string, remoteAccountID *url.URL) bool  } diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index f3b7ee96e..645910d19 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -105,7 +105,7 @@ func (d *deref) GetRemoteStatus(ctx context.Context, username string, remoteStat  		return nil, nil, fmt.Errorf("GetRemoteStatus: error populating status fields: %s", err)  	} -	if err := d.db.PutStatus(ctx, gtsStatus); err != nil { +	if err := d.db.PutStatus(ctx, gtsStatus); err != nil && !errors.Is(err, db.ErrAlreadyExists) {  		return nil, nil, fmt.Errorf("GetRemoteStatus: error putting new status: %s", err)  	} @@ -441,7 +441,6 @@ func (d *deref) populateStatusEmojis(ctx context.Context, status *gtsmodel.Statu  				Disabled:             e.Disabled,  				VisibleInPicker:      e.VisibleInPicker,  			}) -  			if err != nil {  				log.Errorf("populateStatusEmojis: couldn't get remote emoji %s: %s", e.URI, err)  				continue diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go index 7d7431110..d15f05964 100644 --- a/internal/federation/dereferencing/thread.go +++ b/internal/federation/dereferencing/thread.go @@ -24,218 +24,287 @@ import (  	"net/url"  	"codeberg.org/gruf/go-kv" +	"github.com/superseriousbusiness/activity/streams/vocab"  	"github.com/superseriousbusiness/gotosocial/internal/ap"  	"github.com/superseriousbusiness/gotosocial/internal/config" +	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log"  	"github.com/superseriousbusiness/gotosocial/internal/uris"  ) +// maxIter defines how many iterations of descendants or +// ancesters we are willing to follow before returning error. +const maxIter = 1000 +  // DereferenceThread takes a statusable (something that has withReplies and withInReplyTo),  // and dereferences statusables in the conversation.  //  // This process involves working up and down the chain of replies, and parsing through the collections of IDs  // presented by remote instances as part of their replies collections, and will likely involve making several calls to  // multiple different hosts. -func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error { +// +// This does not return error, as for robustness we do not want to error-out on a status because another further up / down has issues. +func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {  	l := log.WithFields(kv.Fields{ -  		{"username", username}, -		{"statusIRI", statusIRI}, +		{"statusIRI", status.URI},  	}...) -	l.Trace("entering DereferenceThread") -	// if it's our status we already have everything stashed so we can bail early -	if statusIRI.Host == config.GetHost() { -		l.Trace("iri belongs to us, bailing") -		return nil -	} +	// Log function start +	l.Trace("beginning") -	// first make sure we have this status in our db -	_, statusable, err := d.GetRemoteStatus(ctx, username, statusIRI, true, false) -	if err != nil { -		return fmt.Errorf("DereferenceThread: error getting initial status with id %s: %s", statusIRI.String(), err) +	// Ensure that ancestors have been fully dereferenced +	if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil { +		l.Errorf("error dereferencing status ancestors: %v", err) +		// we don't return error, we have deref'd as much as we can  	} -	// first iterate up through ancestors, dereferencing if necessary as we go -	if err := d.iterateAncestors(ctx, username, *statusIRI); err != nil { -		return fmt.Errorf("error iterating ancestors of status %s: %s", statusIRI.String(), err) +	// Ensure that descendants have been fully dereferenced +	if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil { +		l.Errorf("error dereferencing status descendants: %v", err) +		// we don't return error, we have deref'd as much as we can  	} - -	// now iterate down through descendants, again dereferencing as we go -	if err := d.iterateDescendants(ctx, username, *statusIRI, statusable); err != nil { -		return fmt.Errorf("error iterating descendants of status %s: %s", statusIRI.String(), err) -	} - -	return nil  } -// iterateAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way. -func (d *deref) iterateAncestors(ctx context.Context, username string, statusIRI url.URL) error { -	l := log.WithFields(kv.Fields{ +// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way. +func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error { +	// Take ref to original +	ogIRI := status.URI +	// Start log entry with fields +	l := log.WithFields(kv.Fields{  		{"username", username}, -		{"statusIRI", statusIRI}, +		{"statusIRI", ogIRI},  	}...) -	l.Trace("entering iterateAncestors") -	// if it's our status we don't need to dereference anything so we can immediately move up the chain -	if statusIRI.Host == config.GetHost() { -		l.Trace("iri belongs to us, moving up to next ancestor") - -		// since this is our status, we know we can extract the id from the status path -		_, id, err := uris.ParseStatusesPath(&statusIRI) -		if err != nil { -			return err -		} - -		status, err := d.db.GetStatusByID(ctx, id) -		if err != nil { -			return err -		} +	// Log function start +	l.Trace("beginning") +	for i := 0; i < maxIter; i++ {  		if status.InReplyToURI == "" {  			// status doesn't reply to anything  			return nil  		} -		nextIRI, err := url.Parse(status.URI) +		// Parse this status's replied IRI +		replyIRI, err := url.Parse(status.InReplyToURI)  		if err != nil { -			return err +			return fmt.Errorf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err)  		} -		return d.iterateAncestors(ctx, username, *nextIRI) -	} +		if replyIRI.Host == config.GetHost() { +			l.Tracef("following local status ancestors: %s", status.InReplyToURI) -	// If we reach here, we're looking at a remote status -	_, statusable, err := d.GetRemoteStatus(ctx, username, &statusIRI, true, false) -	if err != nil { -		l.Debugf("couldn't get remote status %s: %s; can't iterate any more ancestors", statusIRI.String(), err) -		return nil -	} +			// This is our status, extract ID from path +			_, id, err := uris.ParseStatusesPath(replyIRI) +			if err != nil { +				return fmt.Errorf("invalid local status IRI %q: %w", status.InReplyToURI, err) +			} + +			// Fetch this status from the database +			localStatus, err := d.db.GetStatusByID(ctx, id) +			if err != nil { +				return fmt.Errorf("error fetching local status %q: %w", id, err) +			} + +			// Set the fetched status +			status = localStatus + +		} else { +			l.Tracef("following remote status ancestors: %s", status.InReplyToURI) + +			// Fetch the remote status found at this IRI +			remoteStatus, _, err := d.GetRemoteStatus(ctx, username, replyIRI, false, false) +			if err != nil { +				return fmt.Errorf("error fetching remote status %q: %w", status.InReplyToURI, err) +			} -	inReplyTo := ap.ExtractInReplyToURI(statusable) -	if inReplyTo == nil || inReplyTo.String() == "" { -		// status doesn't reply to anything -		return nil +			// Set the fetched status +			status = remoteStatus +		}  	} -	// now move up to the next ancestor -	return d.iterateAncestors(ctx, username, *inReplyTo) +	return fmt.Errorf("reached %d ancestor iterations for %q", maxIter, ogIRI)  } -func (d *deref) iterateDescendants(ctx context.Context, username string, statusIRI url.URL, statusable ap.Statusable) error { -	l := log.WithFields(kv.Fields{ +func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error { +	// Take ref to original +	ogIRI := statusIRI +	// Start log entry with fields +	l := log.WithFields(kv.Fields{  		{"username", username}, -		{"statusIRI", statusIRI}, +		{"statusIRI", ogIRI},  	}...) -	l.Trace("entering iterateDescendants") - -	// if it's our status we already have descendants stashed so we can bail early -	if statusIRI.Host == config.GetHost() { -		l.Trace("iri belongs to us, bailing") -		return nil -	} -	replies := statusable.GetActivityStreamsReplies() -	if replies == nil || !replies.IsActivityStreamsCollection() { -		l.Trace("no replies, bailing") -		return nil +	// Log function start +	l.Trace("beginning") + +	// frame represents a single stack frame when iteratively +	// dereferencing status descendants. where statusIRI and +	// statusable are of the status whose children we are to +	// descend, page is the current activity streams collection +	// page of entities we are on (as we often push a frame to +	// stack mid-paging), and item___ are entity iterators for +	// this activity streams collection page. +	type frame struct { +		statusIRI  *url.URL +		statusable ap.Statusable +		page       ap.CollectionPageable +		itemIter   vocab.ActivityStreamsItemsPropertyIterator +		iterLen    int +		iterIdx    int  	} -	repliesCollection := replies.GetActivityStreamsCollection() -	if repliesCollection == nil { -		l.Trace("replies collection is nil, bailing") -		return nil -	} +	var ( +		// current is the current stack frame +		current *frame + +		// stack is a list of "shelved" descendand iterator +		// frames. this is pushed to when a child status frame +		// is found that we need to further iterate down, and +		// popped from into 'current' when that child's tree +		// of further descendants is exhausted. +		stack = []*frame{ +			{ +				// Starting input is first frame +				statusIRI:  statusIRI, +				statusable: parent, +			}, +		} -	first := repliesCollection.GetActivityStreamsFirst() -	if first == nil { -		l.Trace("replies collection has no first, bailing") -		return nil -	} +		// popStack will remove and return the top frame +		// from the stack, or nil if currently empty. +		popStack = func() *frame { +			if len(stack) == 0 { +				return nil +			} -	firstPage := first.GetActivityStreamsCollectionPage() -	if firstPage == nil { -		l.Trace("first has no collection page, bailing") -		return nil -	} +			// Get frame index +			idx := len(stack) - 1 -	firstPageNext := firstPage.GetActivityStreamsNext() -	if firstPageNext == nil || !firstPageNext.IsIRI() { -		l.Trace("next is not an iri, bailing") -		return nil -	} +			// Pop last frame +			frame := stack[idx] +			stack = stack[:idx] -	var foundReplies int -	currentPageIRI := firstPageNext.GetIRI() +			return frame +		} +	) -pageLoop: -	for { -		l.Tracef("dereferencing page %s", currentPageIRI) -		collectionPage, err := d.DereferenceCollectionPage(ctx, username, currentPageIRI) -		if err != nil { -			l.Debugf("couldn't get remote collection page %s: %s; breaking pageLoop", currentPageIRI, err) -			break pageLoop +stackLoop: +	for i := 0; i < maxIter; i++ { +		// Pop next frame, nil means we are at end +		if current = popStack(); current == nil { +			return nil  		} -		pageItems := collectionPage.GetActivityStreamsItems() -		if pageItems.Len() == 0 { -			// no items on this page, which means we're done -			break pageLoop +		if current.page == nil { +			// This is a local status, no looping to do +			if current.statusIRI.Host == config.GetHost() { +				continue stackLoop +			} + +			l.Tracef("following remote status descendants: %s", current.statusIRI) + +			// Look for an attached status replies (as collection) +			replies := current.statusable.GetActivityStreamsReplies() +			if replies == nil || !replies.IsActivityStreamsCollection() { +				continue stackLoop +			} + +			// Get the status replies collection +			collection := replies.GetActivityStreamsCollection() + +			// Get the "first" property of the replies collection +			first := collection.GetActivityStreamsFirst() +			if first == nil || !first.IsActivityStreamsCollectionPage() { +				continue stackLoop +			} + +			// Set the first activity stream collection page +			current.page = first.GetActivityStreamsCollectionPage()  		} -		// have a look through items and see what we can find -		for iter := pageItems.Begin(); iter != pageItems.End(); iter = iter.Next() { -			// We're looking for a url to feed to GetRemoteStatus. -			// Each item can be either an IRI, or a Note. -			// If a note, we grab the ID from it and call it, rather than parsing the note. -			var itemURI *url.URL -			switch { -			case iter.IsIRI(): -				// iri, easy -				itemURI = iter.GetIRI() -			case iter.IsActivityStreamsNote(): -				// note, get the id from it to use as iri -				note := iter.GetActivityStreamsNote() -				noteID := note.GetJSONLDId() -				if noteID != nil && noteID.IsIRI() { -					itemURI = noteID.GetIRI() +		for /* page loop */ { +			if current.itemIter == nil { +				// Check this page contains any items... +				items := current.page.GetActivityStreamsItems() +				if current.iterLen = items.Len(); current.iterLen == 0 { +					continue stackLoop  				} -			default: -				// if it's not an iri or a note, we don't know how to process it -				continue -			} -			if itemURI.Host == config.GetHost() { -				// skip if the reply is from us -- we already have it then -				continue +				// Start off the item iterator +				current.itemIter = items.Begin() +				current.iterIdx = -1  			} -			// we can confidently say now that we found something -			foundReplies++ +		itemLoop: +			for current.iterIdx++; current.iterIdx < current.iterLen; current.iterIdx++ { +				var itemIRI *url.URL + +				// Get next item iterator object +				current.itemIter = current.itemIter.Next() -			// get the remote statusable and put it in the db -			_, statusable, err := d.GetRemoteStatus(ctx, username, itemURI, true, false) -			if err == nil { -				// now iterate descendants of *that* status -				if err := d.iterateDescendants(ctx, username, *itemURI, statusable); err != nil { -					continue +				switch { +				// Item is already an IRI +				case current.itemIter.IsIRI(): +					itemIRI = current.itemIter.GetIRI() + +				// Item is a note, get the note ID IRI +				case current.itemIter.IsActivityStreamsNote(): +					note := current.itemIter.GetActivityStreamsNote() +					if id := note.GetJSONLDId(); id != nil && id.IsIRI() { +						itemIRI = id.GetIRI() +					} +				} + +				if itemIRI == nil { +					// Unusable iter object +					continue itemLoop +				} + +				if itemIRI.Host == config.GetHost() { +					// This child is one of ours, +					continue itemLoop +				} + +				// Dereference the remote status and store in the database +				_, statusable, err := d.GetRemoteStatus(ctx, username, itemIRI, true, false) +				if err != nil { +					l.Errorf("error dereferencing remote status %q: %s", itemIRI.String(), err) +					continue itemLoop  				} + +				// Put current and next frame at top of stack +				stack = append(stack, current, &frame{ +					statusIRI:  itemIRI, +					statusable: statusable, +				})  			} -		} -		nextPage := collectionPage.GetActivityStreamsNext() -		if nextPage != nil && nextPage.IsIRI() { -			nextPageIRI := nextPage.GetIRI() -			l.Tracef("moving on to next page %s", nextPageIRI) -			currentPageIRI = nextPageIRI -		} else { -			l.Trace("no next page, bailing") -			break pageLoop +			// Item iterator is done +			current.itemIter = nil + +			// Get the current page's "next" property +			pageNext := current.page.GetActivityStreamsNext() +			if pageNext == nil || !pageNext.IsIRI() { +				continue stackLoop +			} + +			// Get the "next" page property IRI +			pageNextIRI := pageNext.GetIRI() + +			// Dereference this next collection page by its IRI +			collectionPage, err := d.DereferenceCollectionPage(ctx, username, pageNextIRI) +			if err != nil { +				l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err) +				continue stackLoop +			} + +			// Set the updated collection page +			current.page = collectionPage  		}  	} -	l.Debugf("foundReplies %d", foundReplies) -	return nil +	return fmt.Errorf("reached %d descendant iterations for %q", maxIter, ogIRI.String())  } | 
