summaryrefslogtreecommitdiff
path: root/internal/federation
diff options
context:
space:
mode:
authorLibravatar tobi <31960611+tsmethurst@users.noreply.github.com>2024-01-26 14:17:10 +0100
committerLibravatar GitHub <noreply@github.com>2024-01-26 14:17:10 +0100
commite3052e8c825da699162ea25367e860ac3c66f461 (patch)
tree3d13e83413d4a85ab694034d6c9772f9ec64268a /internal/federation
parent[performance] cache library performance enhancements (updates go-structr => v... (diff)
downloadgotosocial-e3052e8c825da699162ea25367e860ac3c66f461.tar.xz
[bugfix] Don't return Account or Status if new and dereferencing failed, other small fixes (#2563)
* tidy up account, status, webfingering logic a wee bit * go fmt * invert published check * alter resp initialization * get Published from account in typeutils * don't instantiate error for no darn good reason * shadow err * don't repeat error codes in wrapped errors * don't wrap error unnecessarily
Diffstat (limited to 'internal/federation')
-rw-r--r--internal/federation/dereferencing/account.go188
-rw-r--r--internal/federation/dereferencing/finger.go105
-rw-r--r--internal/federation/dereferencing/status.go48
3 files changed, 252 insertions, 89 deletions
diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go
index dcb6116d2..067203780 100644
--- a/internal/federation/dereferencing/account.go
+++ b/internal/federation/dereferencing/account.go
@@ -52,7 +52,7 @@ func accountUpToDate(account *gtsmodel.Account, force bool) bool {
return true
}
- if !account.CreatedAt.IsZero() && account.IsInstance() {
+ if account.IsInstance() && !account.IsNew() {
// Existing instance account. No need for update.
return true
}
@@ -232,8 +232,8 @@ func (d *Dereferencer) getAccountByUsernameDomain(
// Create and pass-through a new bare-bones model for dereferencing.
account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, &gtsmodel.Account{
ID: id.NewULID(),
- Username: username,
Domain: domain,
+ Username: username,
}, nil)
if err != nil {
return nil, nil, err
@@ -372,17 +372,18 @@ func (d *Dereferencer) enrichAccountSafely(
// By default use account.URI
// as the per-URI deref lock.
- uriStr := account.URI
-
- if uriStr == "" {
+ var lockKey string
+ if account.URI != "" {
+ lockKey = account.URI
+ } else {
// No URI is set yet, instead generate a faux-one from user+domain.
- uriStr = "https://" + account.Domain + "/user/" + account.Username
+ lockKey = "https://" + account.Domain + "/users/" + account.Username
}
// Acquire per-URI deref lock, wraping unlock
// to safely defer in case of panic, while still
// performing more granular unlocks when needed.
- unlock := d.state.FedLocks.Lock(uriStr)
+ unlock := d.state.FedLocks.Lock(lockKey)
unlock = doOnce(unlock)
defer unlock()
@@ -394,10 +395,30 @@ func (d *Dereferencer) enrichAccountSafely(
accountable,
)
- if gtserror.StatusCode(err) >= 400 {
- // Update fetch-at to slow re-attempts.
+ if code := gtserror.StatusCode(err); code >= 400 {
+ // No matter what, log the error
+ // so instance admins have an idea
+ // why something isn't working.
+ log.Info(ctx, err)
+
+ if account.IsNew() {
+ // This was a new account enrich
+ // attempt which failed before we
+ // got to store it, so we can't
+ // return anything useful.
+ return nil, nil, err
+ }
+
+ // We had this account stored already
+ // before this enrichment attempt.
+ //
+ // Update fetched_at to slow re-attempts
+ // but don't return early. We can still
+ // return the model we had stored already.
account.FetchedAt = time.Now()
- _ = d.state.DB.UpdateAccount(ctx, account, "fetched_at")
+ if err := d.state.DB.UpdateAccount(ctx, account, "fetched_at"); err != nil {
+ log.Errorf(ctx, "error updating account fetched_at: %v", err)
+ }
}
// Unlock now
@@ -414,7 +435,7 @@ func (d *Dereferencer) enrichAccountSafely(
// in a call to db.Put(Account). Look again in DB by URI.
latest, err = d.state.DB.GetAccountByURI(ctx, account.URI)
if err != nil {
- err = gtserror.Newf("error getting account %s from database after race: %w", uriStr, err)
+ err = gtserror.Newf("error getting account %s from database after race: %w", lockKey, err)
}
}
@@ -440,32 +461,44 @@ func (d *Dereferencer) enrichAccount(
if account.Username != "" {
// A username was provided so we can attempt a webfinger, this ensures up-to-date accountdomain info.
accDomain, accURI, err := d.fingerRemoteAccount(ctx, tsport, account.Username, account.Domain)
- if err != nil {
- if account.URI == "" {
- // this is a new account (to us) with username@domain
- // but failed webfinger, nothing more we can do.
- err := gtserror.Newf("error webfingering account: %w", err)
- return nil, nil, gtserror.SetUnretrievable(err)
- }
+ switch {
- // Simply log this error and move on, we already have an account URI.
- log.Errorf(ctx, "error webfingering[1] remote account %s@%s: %v", account.Username, account.Domain, err)
- }
+ case err != nil && account.URI == "":
+ // This is a new account (to us) with username@domain
+ // but failed webfinger, nothing more we can do.
+ err := gtserror.Newf("error webfingering account: %w", err)
+ return nil, nil, gtserror.SetUnretrievable(err)
- if err == nil {
- if account.Domain != accDomain {
- // After webfinger, we now have correct account domain from which we can do a final DB check.
- alreadyAccount, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain)
- if err != nil && !errors.Is(err, db.ErrNoEntries) {
- return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err)
- }
+ case err != nil:
+ // Simply log this error and move on,
+ // we already have an account URI.
+ log.Errorf(ctx,
+ "error webfingering[1] remote account %s@%s: %v",
+ account.Username, account.Domain, err,
+ )
+
+ case err == nil && account.Domain != accDomain:
+ // After webfinger, we now have correct account domain from which we can do a final DB check.
+ alreadyAcct, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain)
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err)
+ }
- if alreadyAccount != nil {
- // Enrich existing account.
- account = alreadyAccount
- }
+ if alreadyAcct != nil {
+ // We had this account stored under
+ // the discovered accountDomain.
+ // Proceed with this account.
+ account = alreadyAcct
}
+ // Whether we had the account or not, we
+ // now have webfinger info relevant to the
+ // account, so fallthrough to set webfinger
+ // info on either the account we just found,
+ // or the stub account we were passed.
+ fallthrough
+
+ case err == nil:
// Update account with latest info.
account.URI = accURI.String()
account.Domain = accDomain
@@ -474,13 +507,31 @@ func (d *Dereferencer) enrichAccount(
}
if uri == nil {
- // No URI provided / found, must parse from account.
+ // No URI provided / found,
+ // must parse from account.
uri, err = url.Parse(account.URI)
if err != nil {
- return nil, nil, gtserror.Newf("invalid uri %q: %w", account.URI, err)
+ return nil, nil, gtserror.Newf(
+ "invalid uri %q: %w",
+ account.URI, gtserror.SetUnretrievable(err),
+ )
+ }
+
+ if uri.Scheme != "http" && uri.Scheme != "https" {
+ err = errors.New("account URI scheme must be http or https")
+ return nil, nil, gtserror.Newf(
+ "invalid uri %q: %w",
+ account.URI, gtserror.SetUnretrievable(err),
+ )
}
}
+ /*
+ BY THIS POINT we must have an account URI set,
+ either provided, pinned to the account, or
+ obtained via webfinger call.
+ */
+
// Check whether this account URI is a blocked domain / subdomain.
if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil {
return nil, nil, gtserror.Newf("error checking blocked domain: %w", err)
@@ -493,27 +544,45 @@ func (d *Dereferencer) enrichAccount(
defer d.stopHandshake(requestUser, uri)
if apubAcc == nil {
+ // We were not given any (partial) ActivityPub
+ // version of this account as a parameter.
// Dereference latest version of the account.
b, err := tsport.Dereference(ctx, uri)
if err != nil {
- err := gtserror.Newf("error deferencing %s: %w", uri, err)
+ err := gtserror.Newf("error dereferencing %s: %w", uri, err)
return nil, nil, gtserror.SetUnretrievable(err)
}
// Attempt to resolve ActivityPub acc from data.
apubAcc, err = ap.ResolveAccountable(ctx, b)
if err != nil {
- return nil, nil, gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err)
+ // ResolveAccountable will set Unretrievable/WrongType
+ // on the returned error, so we don't need to do it here.
+ err = gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err)
+ return nil, nil, err
}
}
+ /*
+ BY THIS POINT we must have the ActivityPub
+ representation of the account, either provided,
+ or obtained via a dereference call.
+ */
+
// Convert the dereferenced AP account object to our GTS model.
+ //
+ // We put this in the variable latestAcc because we might want
+ // to compare the provided account model with this fresh version,
+ // in order to check if anything changed since we last saw it.
latestAcc, err := d.converter.ASRepresentationToAccount(ctx,
apubAcc,
account.Domain,
)
if err != nil {
- return nil, nil, gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err)
+ // ASRepresentationToAccount will set Malformed on the
+ // returned error, so we don't need to do it here.
+ err = gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err)
+ return nil, nil, err
}
if account.Username == "" {
@@ -528,14 +597,15 @@ func (d *Dereferencer) enrichAccount(
// from example.org to somewhere.else: we want to take somewhere.else
// as the accountDomain then, not the example.org we were redirected from.
- // Assume the host from the returned ActivityPub representation.
- idProp := apubAcc.GetJSONLDId()
- if idProp == nil || !idProp.IsIRI() {
+ // Assume the host from the returned
+ // ActivityPub representation.
+ id := ap.GetJSONLDId(apubAcc)
+ if id == nil {
return nil, nil, gtserror.New("no id property found on person, or id was not an iri")
}
// Get IRI host value.
- accHost := idProp.GetIRI().Host
+ accHost := id.Host
latestAcc.Domain, _, err = d.fingerRemoteAccount(ctx,
tsport,
@@ -553,7 +623,7 @@ func (d *Dereferencer) enrichAccount(
}
if latestAcc.Domain == "" {
- // ensure we have a domain set by this point,
+ // Ensure we have a domain set by this point,
// otherwise it gets stored as a local user!
//
// TODO: there is probably a more granular way
@@ -562,7 +632,16 @@ func (d *Dereferencer) enrichAccount(
return nil, nil, gtserror.Newf("empty domain for %s", uri)
}
- // Ensure ID is set and update fetch time.
+ /*
+ BY THIS POINT we have more or less a fullly-formed
+ representation of the target account, derived from
+ a combination of webfinger lookups and dereferencing.
+ Further fetching beyond this point is for peripheral
+ things like account avatar, header, emojis.
+ */
+
+ // Ensure internal db ID is
+ // set and update fetch time.
latestAcc.ID = account.ID
latestAcc.FetchedAt = time.Now()
@@ -581,12 +660,14 @@ func (d *Dereferencer) enrichAccount(
log.Errorf(ctx, "error fetching remote emojis for account %s: %v", uri, err)
}
- if account.CreatedAt.IsZero() {
- // CreatedAt will be zero if no local copy was
- // found in one of the GetAccountBy___() functions.
- //
- // Set time of creation from the last-fetched date.
- latestAcc.CreatedAt = latestAcc.FetchedAt
+ if account.IsNew() {
+ // Prefer published/created time from
+ // apubAcc, fall back to FetchedAt value.
+ if latestAcc.CreatedAt.IsZero() {
+ latestAcc.CreatedAt = latestAcc.FetchedAt
+ }
+
+ // Set time of update from the last-fetched date.
latestAcc.UpdatedAt = latestAcc.FetchedAt
// This is new, put it in the database.
@@ -595,11 +676,16 @@ func (d *Dereferencer) enrichAccount(
return nil, nil, gtserror.Newf("error putting in database: %w", err)
}
} else {
+ // Prefer published time from apubAcc,
+ // fall back to previous stored value.
+ if latestAcc.CreatedAt.IsZero() {
+ latestAcc.CreatedAt = account.CreatedAt
+ }
+
// Set time of update from the last-fetched date.
latestAcc.UpdatedAt = latestAcc.FetchedAt
- // Use existing account values.
- latestAcc.CreatedAt = account.CreatedAt
+ // Carry over existing account language.
latestAcc.Language = account.Language
// This is an existing account, update the model in the database.
diff --git a/internal/federation/dereferencing/finger.go b/internal/federation/dereferencing/finger.go
index a81afa5ea..514a058ba 100644
--- a/internal/federation/dereferencing/finger.go
+++ b/internal/federation/dereferencing/finger.go
@@ -20,54 +20,109 @@ package dereferencing
import (
"context"
"encoding/json"
- "errors"
- "fmt"
"net/url"
"strings"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
+ "github.com/superseriousbusiness/gotosocial/internal/gtserror"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
-func (d *Dereferencer) fingerRemoteAccount(ctx context.Context, transport transport.Transport, targetUsername string, targetHost string) (accountDomain string, accountURI *url.URL, err error) {
- b, err := transport.Finger(ctx, targetUsername, targetHost)
+// fingerRemoteAccount performs a webfinger call for the
+// given username and host, using the provided transport.
+//
+// The webfinger response will be parsed, and the subject
+// domain and AP URI will be extracted and returned.
+//
+// In case the response cannot be parsed, or the response
+// does not contain a valid subject string or AP URI, an
+// error will be returned instead.
+func (d *Dereferencer) fingerRemoteAccount(
+ ctx context.Context,
+ transport transport.Transport,
+ username string,
+ host string,
+) (
+ string, // discovered account domain
+ *url.URL, // discovered account URI
+ error,
+) {
+ // Assemble target namestring for logging.
+ var target = "@" + username + "@" + host
+
+ b, err := transport.Finger(ctx, username, host)
if err != nil {
- err = fmt.Errorf("fingerRemoteAccount: error fingering @%s@%s: %s", targetUsername, targetHost, err)
- return
+ err = gtserror.Newf("error webfingering %s: %w", target, err)
+ return "", nil, err
}
- resp := &apimodel.WellKnownResponse{}
- if err = json.Unmarshal(b, resp); err != nil {
- err = fmt.Errorf("fingerRemoteAccount: could not unmarshal server response as WebfingerAccountResponse while dereferencing @%s@%s: %s", targetUsername, targetHost, err)
- return
+ var resp apimodel.WellKnownResponse
+ if err := json.Unmarshal(b, &resp); err != nil {
+ err = gtserror.Newf("error parsing response as JSON for %s: %w", target, err)
+ return "", nil, err
}
if len(resp.Links) == 0 {
- err = fmt.Errorf("fingerRemoteAccount: no links found in webfinger response %s", string(b))
- return
+ err = gtserror.Newf("no links found in response for %s", target)
+ return "", nil, err
}
if resp.Subject == "" {
- err = fmt.Errorf("fingerRemoteAccount: no subject found in webfinger response %s", string(b))
- return
+ err = gtserror.Newf("no subject found in response for %s", target)
+ return "", nil, err
}
- _, accountDomain, err = util.ExtractWebfingerParts(resp.Subject)
+ _, accountDomain, err := util.ExtractWebfingerParts(resp.Subject)
if err != nil {
- err = fmt.Errorf("fingerRemoteAccount: error extracting webfinger subject parts: %s", err)
+ err = gtserror.Newf("error extracting subject parts for %s: %w", target, err)
+ return "", nil, err
}
- // look through the links for the first one that matches what we need
- for _, l := range resp.Links {
- if l.Rel == "self" && (strings.EqualFold(l.Type, "application/activity+json") || strings.EqualFold(l.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"")) {
- if uri, thiserr := url.Parse(l.Href); thiserr == nil && (uri.Scheme == "http" || uri.Scheme == "https") {
- // found it!
- accountURI = uri
- return
- }
+ // Look through links for the first
+ // one that matches what we need:
+ //
+ // - Must be self link.
+ // - Must be AP type.
+ // - Valid https/http URI.
+ for _, link := range resp.Links {
+ if link.Rel != "self" {
+ // Not self link, ignore.
+ continue
+ }
+
+ if !strings.EqualFold(link.Type, "application/activity+json") &&
+ !strings.EqualFold(link.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"") {
+ // Not an AP type, ignore.
+ continue
+ }
+
+ uri, err := url.Parse(link.Href)
+ if err != nil {
+ log.Warnf(ctx,
+ "invalid href for ActivityPub self link %s for %s: %v",
+ link.Href, target, err,
+ )
+
+ // Funky URL, ignore.
+ continue
+ }
+
+ if uri.Scheme != "http" && uri.Scheme != "https" {
+ log.Warnf(ctx,
+ "invalid href for ActivityPub self link %s for %s: schema must be http or https",
+ link.Href, target,
+ )
+
+ // Can't handle this
+ // schema, ignore.
+ continue
}
+
+ // All looks good, return happily!
+ return accountDomain, uri, nil
}
- return "", nil, errors.New("fingerRemoteAccount: no match found in webfinger response")
+ return "", nil, gtserror.Newf("no suitable self, AP-type link found in webfinger response for %s", target)
}
diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go
index 1eb7d6fdb..7dc22a354 100644
--- a/internal/federation/dereferencing/status.go
+++ b/internal/federation/dereferencing/status.go
@@ -294,10 +294,30 @@ func (d *Dereferencer) enrichStatusSafely(
apubStatus,
)
- if gtserror.StatusCode(err) >= 400 {
- // Update fetch-at to slow re-attempts.
+ if code := gtserror.StatusCode(err); code >= 400 {
+ // No matter what, log the error
+ // so instance admins have an idea
+ // why something isn't working.
+ log.Info(ctx, err)
+
+ if isNew {
+ // This was a new status enrich
+ // attempt which failed before we
+ // got to store it, so we can't
+ // return anything useful.
+ return nil, nil, isNew, err
+ }
+
+ // We had this status stored already
+ // before this enrichment attempt.
+ //
+ // Update fetched_at to slow re-attempts
+ // but don't return early. We can still
+ // return the model we had stored already.
status.FetchedAt = time.Now()
- _ = d.state.DB.UpdateStatus(ctx, status, "fetched_at")
+ if err := d.state.DB.UpdateStatus(ctx, status, "fetched_at"); err != nil {
+ log.Errorf(ctx, "error updating status fetched_at: %v", err)
+ }
}
// Unlock now
@@ -358,7 +378,7 @@ func (d *Dereferencer) enrichStatus(
// Dereference latest version of the status.
b, err := tsport.Dereference(ctx, uri)
if err != nil {
- err := gtserror.Newf("error deferencing %s: %w", uri, err)
+ err := gtserror.Newf("error dereferencing %s: %w", uri, err)
return nil, nil, gtserror.SetUnretrievable(err)
}
@@ -388,16 +408,21 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err)
}
- // Use existing status ID.
- latestStatus.ID = status.ID
- if latestStatus.ID == "" {
-
- // Generate new status ID from the provided creation date.
+ // Check if we've previously
+ // stored this status in the DB.
+ // If we have, it'll be ID'd.
+ var isNew = (status.ID == "")
+ if isNew {
+ // No ID, we haven't stored this status before.
+ // Generate new status ID from the status publication time.
latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt)
if err != nil {
log.Errorf(ctx, "invalid created at date (falling back to 'now'): %v", err)
latestStatus.ID = id.NewULID() // just use "now"
}
+ } else {
+ // Reuse existing status ID.
+ latestStatus.ID = status.ID
}
// Carry-over values and set fetch time.
@@ -436,10 +461,7 @@ func (d *Dereferencer) enrichStatus(
return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err)
}
- if status.CreatedAt.IsZero() {
- // CreatedAt will be zero if no local copy was
- // found in one of the GetStatusBy___() functions.
- //
+ if isNew {
// This is new, put the status in the database.
err := d.state.DB.PutStatus(ctx, latestStatus)
if err != nil {