summary refs log tree commit diff
diff options
context:
space:
mode:
authorkim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com>2024-01-09 09:42:39 +0000
committertobi <tobi.smethurst@protonmail.com>2024-01-10 13:54:21 +0100
commitd5c305dc6e3275589d2931bcb0ae7d912c9ab04a (patch)
tree148eb12c25dd3a52eff55caa37eaea4d596d757f
parent1c56192a44175406b12cf66975e73e9621b14b7e (diff)
[bugfix] misc dereferencer fixes (#2475)
* only perform status-up-to-date checks if no statusable has been provided

* copy over the same style of freshness checking from status deref -> accounts

* change some var names

* check for empty account domain
-rw-r--r--internal/federation/dereferencing/account.go121
-rw-r--r--internal/federation/dereferencing/status.go17
2 files changed, 92 insertions, 46 deletions
diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go
index 19d53895..dcb6116d 100644
--- a/internal/federation/dereferencing/account.go
+++ b/internal/federation/dereferencing/account.go
@@ -41,7 +41,7 @@ import (
 
 // accountUpToDate returns whether the given account model is both updateable (i.e.
 // non-instance remote account) and whether it needs an update based on `fetched_at`.
-func accountUpToDate(account *gtsmodel.Account) bool {
+func accountUpToDate(account *gtsmodel.Account, force bool) bool {
 	if !account.SuspendedAt.IsZero() {
 		// Can't update suspended accounts.
 		return true
@@ -57,8 +57,19 @@ func accountUpToDate(account *gtsmodel.Account) bool {
 		return true
 	}
 
-	// If this account was updated recently (last interval), we return as-is.
-	if next := account.FetchedAt.Add(6 * time.Hour); time.Now().Before(next) {
+	// Default limit we allow
+	// statuses to be refreshed.
+	limit := 6 * time.Hour
+
+	if force {
+		// We specifically allow the force flag
+		// to force an early refresh (on a much
+		// smaller cooldown period).
+		limit = 5 * time.Minute
+	}
+
+	// If account was updated recently (within limit), we return as-is.
+	if next := account.FetchedAt.Add(limit); time.Now().Before(next) {
 		return true
 	}
 
@@ -70,7 +81,7 @@ func accountUpToDate(account *gtsmodel.Account) bool {
 // may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced.
 func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Account, ap.Accountable, error) {
 	// Fetch and dereference account if necessary.
-	account, apubAcc, err := d.getAccountByURI(ctx,
+	account, accountable, err := d.getAccountByURI(ctx,
 		requestUser,
 		uri,
 	)
@@ -78,7 +89,7 @@ func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string,
 		return nil, nil, err
 	}
 
-	if apubAcc != nil {
+	if accountable != nil {
 		// This account was updated, enqueue re-dereference featured posts.
 		d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
 			if err := d.dereferenceAccountFeatured(ctx, requestUser, account); err != nil {
@@ -87,7 +98,7 @@ func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string,
 		})
 	}
 
-	return account, apubAcc, nil
+	return account, accountable, nil
 }
 
 // getAccountByURI is a package internal form of .GetAccountByURI() that doesn't bother dereferencing featured posts on update.
@@ -134,17 +145,18 @@ func (d *Dereferencer) getAccountByURI(ctx context.Context, requestUser string,
 		}, nil)
 	}
 
-	// Check whether needs update.
-	if accountUpToDate(account) {
-		// This is existing up-to-date account, ensure it is populated.
+	if accountUpToDate(account, false) {
+		// This is an existing account that is up-to-date,
+		// before returning ensure it is fully populated.
 		if err := d.state.DB.PopulateAccount(ctx, account); err != nil {
 			log.Errorf(ctx, "error populating existing account: %v", err)
 		}
+
 		return account, nil, nil
 	}
 
 	// Try to update existing account model.
-	latest, apubAcc, err := d.enrichAccountSafely(ctx,
+	latest, accountable, err := d.enrichAccountSafely(ctx,
 		requestUser,
 		uri,
 		account,
@@ -157,14 +169,14 @@ func (d *Dereferencer) getAccountByURI(ctx context.Context, requestUser string,
 		return account, nil, nil
 	}
 
-	return latest, apubAcc, nil
+	return latest, accountable, nil
 }
 
 // GetAccountByUsernameDomain will attempt to fetch an accounts by its username@domain, first checking the database. In the case of a newly-met remote model,
 // or a remote model whose last_fetched date is beyond a certain interval, the account will be dereferenced. In the case of dereferencing, some low-priority
 // account information may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced.
 func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUser string, username string, domain string) (*gtsmodel.Account, ap.Accountable, error) {
-	account, apubAcc, err := d.getAccountByUsernameDomain(
+	account, accountable, err := d.getAccountByUsernameDomain(
 		ctx,
 		requestUser,
 		username,
@@ -174,7 +186,7 @@ func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUs
 		return nil, nil, err
 	}
 
-	if apubAcc != nil {
+	if accountable != nil {
 		// This account was updated, enqueue re-dereference featured posts.
 		d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
 			if err := d.dereferenceAccountFeatured(ctx, requestUser, account); err != nil {
@@ -183,12 +195,11 @@ func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUs
 		})
 	}
 
-	return account, apubAcc, nil
+	return account, accountable, nil
 }
 
-// getAccountByUsernameDomain is a package internal form
-// of .GetAccountByUsernameDomain() that doesn't bother
-// dereferencing featured posts.
+// getAccountByUsernameDomain is a package internal form of
+// GetAccountByUsernameDomain() that doesn't bother deref of featured posts.
 func (d *Dereferencer) getAccountByUsernameDomain(
 	ctx context.Context,
 	requestUser string,
@@ -219,7 +230,7 @@ func (d *Dereferencer) getAccountByUsernameDomain(
 		}
 
 		// Create and pass-through a new bare-bones model for dereferencing.
-		account, apubAcc, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{
+		account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{
 			ID:       id.NewULID(),
 			Username: username,
 			Domain:   domain,
@@ -228,11 +239,11 @@ func (d *Dereferencer) getAccountByUsernameDomain(
 			return nil, nil, err
 		}
 
-		return account, apubAcc, nil
+		return account, accountable, nil
 	}
 
 	// Try to update existing account model.
-	latest, apubAcc, err := d.RefreshAccount(ctx,
+	latest, accountable, err := d.RefreshAccount(ctx,
 		requestUser,
 		account,
 		nil,
@@ -243,22 +254,32 @@ func (d *Dereferencer) getAccountByUsernameDomain(
 		return account, nil, nil //nolint
 	}
 
-	if apubAcc == nil {
+	if accountable == nil {
 		// This is existing up-to-date account, ensure it is populated.
 		if err := d.state.DB.PopulateAccount(ctx, latest); err != nil {
 			log.Errorf(ctx, "error populating existing account: %v", err)
 		}
 	}
 
-	return latest, apubAcc, nil
+	return latest, accountable, nil
 }
 
-// RefreshAccount updates the given account if remote and last_fetched is beyond fetch interval, or if force is set. An updated account model is returned,
-// but in the case of dereferencing, some low-priority account information may be enqueued for asynchronous fetching, e.g. featured account statuses (pins).
+// RefreshAccount updates the given account if remote and last_fetched is
+// beyond fetch interval, or if force is set. An updated account model is
+// returned, but in the case of dereferencing, some low-priority account info
+// may be enqueued for asynchronous fetching, e.g. featured account statuses (pins).
 // An ActivityPub object indicates the account was dereferenced (i.e. updated).
-func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, account *gtsmodel.Account, apubAcc ap.Accountable, force bool) (*gtsmodel.Account, ap.Accountable, error) {
-	// Check whether needs update (and not forced).
-	if accountUpToDate(account) && !force {
+func (d *Dereferencer) RefreshAccount(
+	ctx context.Context,
+	requestUser string,
+	account *gtsmodel.Account,
+	accountable ap.Accountable,
+	force bool,
+) (*gtsmodel.Account, ap.Accountable, error) {
+	// If no incoming data is provided,
+	// check whether account needs update.
+	if accountable == nil &&
+		accountUpToDate(account, force) {
 		return account, nil, nil
 	}
 
@@ -269,18 +290,18 @@ func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, a
 	}
 
 	// Try to update + deref passed account model.
-	latest, apubAcc, err := d.enrichAccountSafely(ctx,
+	latest, accountable, err := d.enrichAccountSafely(ctx,
 		requestUser,
 		uri,
 		account,
-		apubAcc,
+		accountable,
 	)
 	if err != nil {
 		log.Errorf(ctx, "error enriching remote account: %v", err)
 		return nil, nil, gtserror.Newf("error enriching remote account: %w", err)
 	}
 
-	if apubAcc != nil {
+	if accountable != nil {
 		// This account was updated, enqueue re-dereference featured posts.
 		d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
 			if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil {
@@ -289,14 +310,24 @@ func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, a
 		})
 	}
 
-	return latest, apubAcc, nil
+	return latest, accountable, nil
 }
 
-// RefreshAccountAsync enqueues the given account for an asychronous update fetching, if last_fetched is beyond fetch interval, or if forcc is set.
-// This is a more optimized form of manually enqueueing .UpdateAccount() to the federation worker, since it only enqueues update if necessary.
-func (d *Dereferencer) RefreshAccountAsync(ctx context.Context, requestUser string, account *gtsmodel.Account, apubAcc ap.Accountable, force bool) {
-	// Check whether needs update (and not forced).
-	if accountUpToDate(account) && !force {
+// RefreshAccountAsync enqueues the given account for an asychronous
+// update fetching, if last_fetched is beyond fetch interval, or if force
+// is set. This is a more optimized form of manually enqueueing .UpdateAccount()
+// to the federation worker, since it only enqueues update if necessary.
+func (d *Dereferencer) RefreshAccountAsync(
+	ctx context.Context,
+	requestUser string,
+	account *gtsmodel.Account,
+	accountable ap.Accountable,
+	force bool,
+) {
+	// If no incoming data is provided,
+	// check whether account needs update.
+	if accountable == nil &&
+		accountUpToDate(account, force) {
 		return
 	}
 
@@ -309,13 +340,13 @@ func (d *Dereferencer) RefreshAccountAsync(ctx context.Context, requestUser stri
 
 	// Enqueue a worker function to enrich this account async.
 	d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) {
-		latest, apubAcc, err := d.enrichAccountSafely(ctx, requestUser, uri, account, apubAcc)
+		latest, accountable, err := d.enrichAccountSafely(ctx, requestUser, uri, account, accountable)
 		if err != nil {
 			log.Errorf(ctx, "error enriching remote account: %v", err)
 			return
 		}
 
-		if apubAcc != nil {
+		if accountable != nil {
 			// This account was updated, enqueue re-dereference featured posts.
 			if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil {
 				log.Errorf(ctx, "error fetching account featured collection: %v", err)
@@ -332,7 +363,7 @@ func (d *Dereferencer) enrichAccountSafely(
 	requestUser string,
 	uri *url.URL,
 	account *gtsmodel.Account,
-	apubAcc ap.Accountable,
+	accountable ap.Accountable,
 ) (*gtsmodel.Account, ap.Accountable, error) {
 	// Noop if account has been suspended.
 	if !account.SuspendedAt.IsZero() {
@@ -360,7 +391,7 @@ func (d *Dereferencer) enrichAccountSafely(
 		requestUser,
 		uri,
 		account,
-		apubAcc,
+		accountable,
 	)
 
 	if gtserror.StatusCode(err) >= 400 {
@@ -521,6 +552,16 @@ func (d *Dereferencer) enrichAccount(
 		}
 	}
 
+	if latestAcc.Domain == "" {
+		// ensure we have a domain set by this point,
+		// otherwise it gets stored as a local user!
+		//
+		// TODO: there is probably a more granular way
+		// way of checking this in each of the above parts,
+		// and honestly it could do with a smol refactor.
+		return nil, nil, gtserror.Newf("empty domain for %s", uri)
+	}
+
 	// Ensure ID is set and update fetch time.
 	latestAcc.ID = account.ID
 	latestAcc.FetchedAt = time.Now()
diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go
index 2a2b99d2..1eb7d6fd 100644
--- a/internal/federation/dereferencing/status.go
+++ b/internal/federation/dereferencing/status.go
@@ -135,12 +135,13 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
 		}, nil)
 	}
 
-	// Check whether needs update.
 	if statusUpToDate(status, false) {
-		// This is existing up-to-date status, ensure it is populated.
+		// This is an existing status that is up-to-date,
+		// before returning ensure it is fully populated.
 		if err := d.state.DB.PopulateStatus(ctx, status); err != nil {
 			log.Errorf(ctx, "error populating existing status: %v", err)
 		}
+
 		return status, nil, false, nil
 	}
 
@@ -170,8 +171,10 @@ func (d *Dereferencer) RefreshStatus(
 	statusable ap.Statusable,
 	force bool,
 ) (*gtsmodel.Status, ap.Statusable, error) {
-	// Check whether status needs update.
-	if statusUpToDate(status, force) {
+	// If no incoming data is provided,
+	// check whether status needs update.
+	if statusable == nil &&
+		statusUpToDate(status, force) {
 		return status, nil, nil
 	}
 
@@ -215,8 +218,10 @@ func (d *Dereferencer) RefreshStatusAsync(
 	statusable ap.Statusable,
 	force bool,
 ) {
-	// Check whether status needs update.
-	if statusUpToDate(status, force) {
+	// If no incoming data is provided,
+	// check whether status needs update.
+	if statusable == nil &&
+		statusUpToDate(status, force) {
 		return
 	}