summaryrefslogtreecommitdiff
path: root/internal/federation/dereferencing/thread.go
diff options
context:
space:
mode:
authorLibravatar tobi <31960611+tsmethurst@users.noreply.github.com>2023-06-24 09:32:10 +0200
committerLibravatar GitHub <noreply@github.com>2023-06-24 08:32:10 +0100
commit3e19f480e63085d48ea4242f46b57105edaa640f (patch)
tree0cb8ae6aa87dbde884f33a6b3531d10910cab87b /internal/federation/dereferencing/thread.go
parent[bugfix/chore] oauth entropy fix + media cleanup tasks rewrite (#1853) (diff)
downloadgotosocial-3e19f480e63085d48ea4242f46b57105edaa640f.tar.xz
[bugfix] Ensure `InReplyToID` set properly, update dereference ancestors func (#1921)
Diffstat (limited to 'internal/federation/dereferencing/thread.go')
-rw-r--r--internal/federation/dereferencing/thread.go208
1 files changed, 153 insertions, 55 deletions
diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go
index a12e537bc..a81849e54 100644
--- a/internal/federation/dereferencing/thread.go
+++ b/internal/federation/dereferencing/thread.go
@@ -19,6 +19,8 @@ package dereferencing
import (
"context"
+ "errors"
+ "net/http"
"net/url"
"codeberg.org/gruf/go-kv"
@@ -26,96 +28,184 @@ import (
"github.com/superseriousbusiness/activity/streams/vocab"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/config"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
- "github.com/superseriousbusiness/gotosocial/internal/uris"
)
// maxIter defines how many iterations of descendants or
// ancesters we are willing to follow before returning error.
const maxIter = 1000
-// dereferenceThread will dereference statuses both above and below the given status in a thread, it returns no error and is intended to be called asychronously.
func (d *deref) dereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {
// Ensure that ancestors have been fully dereferenced
- if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil {
- log.Error(ctx, err) // log entry and error will include caller prefixes
+ if err := d.DereferenceStatusAncestors(ctx, username, status); err != nil {
+ log.Error(ctx, err)
}
// Ensure that descendants have been fully dereferenced
- if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil {
- log.Error(ctx, err) // log entry and error will include caller prefixes
+ if err := d.DereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil {
+ log.Error(ctx, err)
}
}
-// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
-func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error {
- // Take ref to original
- ogIRI := status.URI
-
- // Start log entry with fields
- l := log.WithContext(ctx).
- WithFields(kv.Fields{
- {"username", username},
- {"statusIRI", ogIRI},
- }...)
-
- // Log function start
- l.Trace("beginning")
+func (d *deref) DereferenceStatusAncestors(
+ ctx context.Context,
+ username string,
+ status *gtsmodel.Status,
+) error {
+ // Mark given status as the one
+ // we're currently working on.
+ var current = status
for i := 0; i < maxIter; i++ {
- if status.InReplyToURI == "" {
- // status doesn't reply to anything
+ if current.InReplyToURI == "" {
+ // Status has no parent, we've
+ // reached the top of the chain.
return nil
}
- // Parse this status's replied IRI
- replyIRI, err := url.Parse(status.InReplyToURI)
- if err != nil {
- return gtserror.Newf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err)
- }
-
- if replyIRI.Host == config.GetHost() {
- l.Tracef("following local status ancestors: %s", status.InReplyToURI)
+ l := log.
+ WithContext(ctx).
+ WithFields(kv.Fields{
+ {"username", username},
+ {"originalStatusIRI", status.URI},
+ {"currentStatusURI", current.URI},
+ {"currentInReplyToURI", current.InReplyToURI},
+ }...)
+
+ if current.InReplyToID != "" {
+ // We already have an InReplyToID set. This means
+ // the status's parent has, at some point, been
+ // inserted into the database, either because it
+ // is a status from our instance, or a status from
+ // remote that we've dereferenced before, or found
+ // out about in some other way.
+ //
+ // Working on this assumption, check if the parent
+ // status exists, either as a copy pinned on the
+ // current status, or in the database.
+
+ if current.InReplyTo != nil {
+ // We have the parent already, and the child
+ // doesn't need to be updated; keep iterating
+ // from this parent upwards.
+ current = current.InReplyTo
+ continue
+ }
- // This is our status, extract ID from path
- _, id, err := uris.ParseStatusesPath(replyIRI)
- if err != nil {
- return gtserror.Newf("invalid local status IRI %q: %w", status.InReplyToURI, err)
+ // Parent isn't pinned to this status (yet), see
+ // if we can get it from the db (we should be
+ // able to, since it has an ID already).
+ parent, err := d.state.DB.GetStatusByID(
+ gtscontext.SetBarebones(ctx),
+ current.InReplyToID,
+ )
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ // Real db error, stop.
+ return gtserror.Newf("db error getting status %s: %w", current.InReplyToID, err)
}
- // Fetch this status from the database
- localStatus, err := d.state.DB.GetStatusByID(ctx, id)
- if err != nil {
- return gtserror.Newf("error fetching local status %q: %w", id, err)
+ if parent != nil {
+ // We got the parent from the db, and the child
+ // doesn't need to be updated; keep iterating
+ // from this parent upwards.
+ current.InReplyTo = parent
+ current = parent
+ continue
}
- // Set the fetched status
- status = localStatus
+ // If we arrive here, we know this child *did* have
+ // a parent at some point, but it no longer exists in
+ // the database, presumably because it's been deleted
+ // by another action.
+ //
+ // TODO: clean this up in a nightly task.
+ l.Warnf("current status has been orphaned (parent %s no longer exists in database)", current.InReplyToID)
+ return nil // Cannot iterate further.
+ }
- } else {
- l.Tracef("following remote status ancestors: %s", status.InReplyToURI)
+ // If we reach this point, we know the status has
+ // an InReplyToURI set, but it doesn't yet have an
+ // InReplyToID, which means that the parent status
+ // has not yet been dereferenced.
+ inReplyToURI, err := url.Parse(current.InReplyToURI)
+ if err != nil || inReplyToURI == nil {
+ // Parent URI is not something we can handle.
+ l.Debug("current status has been orphaned (invalid InReplyToURI)")
+ return nil //nolint:nilerr
+ }
- // Fetch the remote status found at this IRI
- remoteStatus, _, err := d.getStatusByURI(
- ctx,
- username,
- replyIRI,
- )
- if err != nil {
- return gtserror.Newf("error fetching remote status %q: %w", status.InReplyToURI, err)
+ // Parent URI is valid, try to get it.
+ // getStatusByURI guards against the following conditions:
+ //
+ // - remote domain is blocked (will return unretrievable)
+ // - domain is local (will try to return something, or
+ // return unretrievable).
+ parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI)
+ if err == nil {
+ // We successfully fetched the parent.
+ // Update current status with new info.
+ current.InReplyToID = parent.ID
+ current.InReplyToAccountID = parent.AccountID
+ if err := d.state.DB.UpdateStatus(
+ ctx, current,
+ "in_reply_to_id",
+ "in_reply_to_account_id",
+ ); err != nil {
+ return gtserror.Newf("db error updating status %s: %w", current.ID, err)
+ }
+
+ // Mark parent as next status to
+ // work on, and keep iterating.
+ current = parent
+ continue
+ }
+
+ // We could not fetch the parent, check if we can do anything
+ // useful with the error. For example, HTTP status code returned
+ // from remote may indicate that the parent has been deleted.
+ switch code := gtserror.StatusCode(err); {
+ case code == http.StatusGone || code == http.StatusNotFound:
+ // 410 means the status has definitely been deleted.
+ // 404 means the status has *probably* been deleted.
+ // Update this status to reflect that, then bail.
+ l.Debugf("current status has been orphaned (call to parent returned code %d)", code)
+
+ current.InReplyToURI = ""
+ if err := d.state.DB.UpdateStatus(
+ ctx, current,
+ "in_reply_to_uri",
+ ); err != nil {
+ return gtserror.Newf("db error updating status %s: %w", current.ID, err)
}
+ return nil
+
+ case code != 0:
+ // We had a code, but not one indicating deletion,
+ // log the code but don't return error or update the
+ // status; we can try again later.
+ l.Warnf("cannot dereference parent (%q)", err)
+ return nil
+
+ case gtserror.Unretrievable(err):
+ // Not retrievable for some other reason, so just
+ // bail; we can try again later if necessary.
+ l.Debugf("parent unretrievable (%q)", err)
+ return nil
- // Set the fetched status
- status = remoteStatus
+ default:
+ // Some other error that stops us in our tracks.
+ return gtserror.Newf("error dereferencing parent %s: %w", current.InReplyToURI, err)
}
}
- return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, ogIRI)
+ return gtserror.Newf("reached %d ancestor iterations for %q", maxIter, status.URI)
}
-func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
+func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
// Take ref to original
ogIRI := statusIRI
@@ -256,9 +346,17 @@ stackLoop:
}
// Dereference the remote status and store in the database.
+ // getStatusByURI guards against the following conditions:
+ //
+ // - remote domain is blocked (will return unretrievable)
+ // - domain is local (will try to return something, or
+ // return unretrievable).
_, statusable, err := d.getStatusByURI(ctx, username, itemIRI)
if err != nil {
- l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
+ if !gtserror.Unretrievable(err) {
+ l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
+ }
+
continue itemLoop
}