diff options
Diffstat (limited to 'internal/federation/dereferencing')
| -rw-r--r-- | internal/federation/dereferencing/dereferencer.go | 6 | ||||
| -rw-r--r-- | internal/federation/dereferencing/status.go | 19 | ||||
| -rw-r--r-- | internal/federation/dereferencing/thread.go | 208 | 
3 files changed, 173 insertions, 60 deletions
diff --git a/internal/federation/dereferencing/dereferencer.go b/internal/federation/dereferencing/dereferencer.go index 2902ebcbc..170fb6119 100644 --- a/internal/federation/dereferencing/dereferencer.go +++ b/internal/federation/dereferencing/dereferencer.go @@ -66,6 +66,12 @@ type Dereferencer interface {  	// This is a more optimized form of manually enqueueing .UpdateStatus() to the federation worker, since it only enqueues update if necessary.  	RefreshStatusAsync(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) +	// DereferenceStatusAncestors iterates upwards from the given status, using InReplyToURI, to ensure that as many parent statuses as possible are dereferenced. +	DereferenceStatusAncestors(ctx context.Context, requestUser string, status *gtsmodel.Status) error + +	// DereferenceStatusDescendents iterates downwards from the given status, using its replies, to ensure that as many children statuses as possible are dereferenced. +	DereferenceStatusDescendants(ctx context.Context, requestUser string, statusIRI *url.URL, parent ap.Statusable) error +  	GetRemoteInstance(ctx context.Context, username string, remoteInstanceURI *url.URL) (*gtsmodel.Instance, error)  	DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index 75adfdd6f..4525f64a9 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -104,7 +104,7 @@ func (d *deref) getStatusByURI(ctx context.Context, requestUser string, uri *url  	}  	if status == nil { -		// Ensure that this is isn't a search for a local status. +		// Ensure that this isn't a search for a local status.  		if uri.Host == config.GetHost() || uri.Host == config.GetAccountDomain() {  			return nil, nil, gtserror.SetUnretrievable(err) // this will be db.ErrNoEntries  		} @@ -149,7 +149,7 @@ func (d *deref) getStatusByURI(ctx context.Context, requestUser string, uri *url  // RefreshStatus: implements Dereferencer{}.RefreshStatus().  func (d *deref) RefreshStatus(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) (*gtsmodel.Status, ap.Statusable, error) {  	// Check whether needs update. -	if statusUpToDate(status) { +	if !force && statusUpToDate(status) {  		return status, nil, nil  	} @@ -205,8 +205,16 @@ func (d *deref) RefreshStatusAsync(ctx context.Context, requestUser string, stat  	})  } -// enrichStatus will enrich the given status, whether a new barebones model, or existing model from the database. It handles necessary dereferencing etc. -func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.URL, status *gtsmodel.Status, apubStatus ap.Statusable) (*gtsmodel.Status, ap.Statusable, error) { +// enrichStatus will enrich the given status, whether a new +// barebones model, or existing model from the database. +// It handles necessary dereferencing, database updates, etc. +func (d *deref) enrichStatus( +	ctx context.Context, +	requestUser string, +	uri *url.URL, +	status *gtsmodel.Status, +	apubStatus ap.Statusable, +) (*gtsmodel.Status, ap.Statusable, error) {  	// Pre-fetch a transport for requesting username, used by later dereferencing.  	tsport, err := d.transportController.NewTransportForUsername(ctx, requestUser)  	if err != nil { @@ -217,7 +225,8 @@ func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.U  	if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil {  		return nil, nil, gtserror.Newf("error checking blocked domain: %w", err)  	} else if blocked { -		return nil, nil, gtserror.Newf("%s is blocked", uri.Host) +		err = gtserror.Newf("%s is blocked", uri.Host) +		return nil, nil, gtserror.SetUnretrievable(err)  	}  	if apubStatus == nil { diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go index a12e537bc..a81849e54 100644 --- a/internal/federation/dereferencing/thread.go +++ b/internal/federation/dereferencing/thread.go @@ -19,6 +19,8 @@ package dereferencing  import (  	"context" +	"errors" +	"net/http"  	"net/url"  	"codeberg.org/gruf/go-kv" @@ -26,96 +28,184 @@ import (  	"github.com/superseriousbusiness/activity/streams/vocab"  	"github.com/superseriousbusiness/gotosocial/internal/ap"  	"github.com/superseriousbusiness/gotosocial/internal/config" +	"github.com/superseriousbusiness/gotosocial/internal/db" +	"github.com/superseriousbusiness/gotosocial/internal/gtscontext"  	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log" -	"github.com/superseriousbusiness/gotosocial/internal/uris"  )  // maxIter defines how many iterations of descendants or  // ancesters we are willing to follow before returning error.  const maxIter = 1000 -// dereferenceThread will dereference statuses both above and below the given status in a thread, it returns no error and is intended to be called asychronously.  func (d *deref) dereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {  	// Ensure that ancestors have been fully dereferenced -	if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil { -		log.Error(ctx, err) // log entry and error will include caller prefixes +	if err := d.DereferenceStatusAncestors(ctx, username, status); err != nil { +		log.Error(ctx, err)  	}  	// Ensure that descendants have been fully dereferenced -	if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil { -		log.Error(ctx, err) // log entry and error will include caller prefixes +	if err := d.DereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil { +		log.Error(ctx, err)  	}  } -// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way. -func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error { -	// Take ref to original -	ogIRI := status.URI - -	// Start log entry with fields -	l := log.WithContext(ctx). -		WithFields(kv.Fields{ -			{"username", username}, -			{"statusIRI", ogIRI}, -		}...) - -	// Log function start -	l.Trace("beginning") +func (d *deref) DereferenceStatusAncestors( +	ctx context.Context, +	username string, +	status *gtsmodel.Status, +) error { +	// Mark given status as the one +	// we're currently working on. +	var current = status  	for i := 0; i < maxIter; i++ { -		if status.InReplyToURI == "" { -			// status doesn't reply to anything +		if current.InReplyToURI == "" { +			// Status has no parent, we've +			// reached the top of the chain.  			return nil  		} -		// Parse this status's replied IRI -		replyIRI, err := url.Parse(status.InReplyToURI) -		if err != nil { -			return gtserror.Newf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err) -		} - -		if replyIRI.Host == config.GetHost() { -			l.Tracef("following local status ancestors: %s", status.InReplyToURI) +		l := log. +			WithContext(ctx). +			WithFields(kv.Fields{ +				{"username", username}, +				{"originalStatusIRI", status.URI}, +				{"currentStatusURI", current.URI}, +				{"currentInReplyToURI", current.InReplyToURI}, +			}...) + +		if current.InReplyToID != "" { +			// We already have an InReplyToID set. This means +			// the status's parent has, at some point, been +			// inserted into the database, either because it +			// is a status from our instance, or a status from +			// remote that we've dereferenced before, or found +			// out about in some other way. +			// +			// Working on this assumption, check if the parent +			// status exists, either as a copy pinned on the +			// current status, or in the database. + +			if current.InReplyTo != nil { +				// We have the parent already, and the child +				// doesn't need to be updated; keep iterating +				// from this parent upwards. +				current = current.InReplyTo +				continue +			} -			// This is our status, extract ID from path -			_, id, err := uris.ParseStatusesPath(replyIRI) -			if err != nil { -				return gtserror.Newf("invalid local status IRI %q: %w", status.InReplyToURI, err) +			// Parent isn't pinned to this status (yet), see +			// if we can get it from the db (we should be +			// able to, since it has an ID already). +			parent, err := d.state.DB.GetStatusByID( +				gtscontext.SetBarebones(ctx), +				current.InReplyToID, +			) +			if err != nil && !errors.Is(err, db.ErrNoEntries) { +				// Real db error, stop. +				return gtserror.Newf("db error getting status %s: %w", current.InReplyToID, err)  			} -			// Fetch this status from the database -			localStatus, err := d.state.DB.GetStatusByID(ctx, id) -			if err != nil { -				return gtserror.Newf("error fetching local status %q: %w", id, err) +			if parent != nil { +				// We got the parent from the db, and the child +				// doesn't need to be updated; keep iterating +				// from this parent upwards. +				current.InReplyTo = parent +				current = parent +				continue  			} -			// Set the fetched status -			status = localStatus +			// If we arrive here, we know this child *did* have +			// a parent at some point, but it no longer exists in +			// the database, presumably because it's been deleted +			// by another action. +			// +			// TODO: clean this up in a nightly task. +			l.Warnf("current status has been orphaned (parent %s no longer exists in database)", current.InReplyToID) +			return nil // Cannot iterate further. +		} -		} else { -			l.Tracef("following remote status ancestors: %s", status.InReplyToURI) +		// If we reach this point, we know the status has +		// an InReplyToURI set, but it doesn't yet have an +		// InReplyToID, which means that the parent status +		// has not yet been dereferenced. +		inReplyToURI, err := url.Parse(current.InReplyToURI) +		if err != nil || inReplyToURI == nil { +			// Parent URI is not something we can handle. +			l.Debug("current status has been orphaned (invalid InReplyToURI)") +			return nil //nolint:nilerr +		} -			// Fetch the remote status found at this IRI -			remoteStatus, _, err := d.getStatusByURI( -				ctx, -				username, -				replyIRI, -			) -			if err != nil { -				return gtserror.Newf("error fetching remote status %q: %w", status.InReplyToURI, err) +		// Parent URI is valid, try to get it. +		// getStatusByURI guards against the following conditions: +		// +		//   - remote domain is blocked (will return unretrievable) +		//   - domain is local (will try to return something, or +		//     return unretrievable). +		parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI) +		if err == nil { +			// We successfully fetched the parent. +			// Update current status with new info. +			current.InReplyToID = parent.ID +			current.InReplyToAccountID = parent.AccountID +			if err := d.state.DB.UpdateStatus( +				ctx, current, +				"in_reply_to_id", +				"in_reply_to_account_id", +			); err != nil { +				return gtserror.Newf("db error updating status %s: %w", current.ID, err) +			} + +			// Mark parent as next status to +			// work on, and keep iterating. +			current = parent +			continue +		} + +		// We could not fetch the parent, check if we can do anything +		// useful with the error. For example, HTTP status code returned +		// from remote may indicate that the parent has been deleted. +		switch code := gtserror.StatusCode(err); { +		case code == http.StatusGone || code == http.StatusNotFound: +			// 410 means the status has definitely been deleted. +			// 404 means the status has *probably* been deleted. +			// Update this status to reflect that, then bail. +			l.Debugf("current status has been orphaned (call to parent returned code %d)", code) + +			current.InReplyToURI = "" +			if err := d.state.DB.UpdateStatus( +				ctx, current, +				"in_reply_to_uri", +			); err != nil { +				return gtserror.Newf("db error updating status %s: %w", current.ID, err)  			} +			return nil + +		case code != 0: +			// We had a code, but not one indicating deletion, +			// log the code but don't return error or update the +			// status; we can try again later. +			l.Warnf("cannot dereference parent (%q)", err) +			return nil + +		case gtserror.Unretrievable(err): +			// Not retrievable for some other reason, so just +			// bail; we can try again later if necessary. +			l.Debugf("parent unretrievable (%q)", err) +			return nil -			// Set the fetched status -			status = remoteStatus +		default: +			// Some other error that stops us in our tracks. +			return gtserror.Newf("error dereferencing parent %s: %w", current.InReplyToURI, err)  		}  	} -	return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, ogIRI) +	return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, status.URI)  } -func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error { +func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {  	// Take ref to original  	ogIRI := statusIRI @@ -256,9 +346,17 @@ stackLoop:  				}  				// Dereference the remote status and store in the database. +				// getStatusByURI guards against the following conditions: +				// +				//   - remote domain is blocked (will return unretrievable) +				//   - domain is local (will try to return something, or +				//     return unretrievable).  				_, statusable, err := d.getStatusByURI(ctx, username, itemIRI)  				if err != nil { -					l.Errorf("error dereferencing remote status %s: %v", itemIRI, err) +					if !gtserror.Unretrievable(err) { +						l.Errorf("error dereferencing remote status %s: %v", itemIRI, err) +					} +  					continue itemLoop  				}  | 
