diff options
author | 2024-01-26 14:17:10 +0100 | |
---|---|---|
committer | 2024-01-26 14:17:10 +0100 | |
commit | e3052e8c825da699162ea25367e860ac3c66f461 (patch) | |
tree | 3d13e83413d4a85ab694034d6c9772f9ec64268a /internal/federation | |
parent | [performance] cache library performance enhancements (updates go-structr => v... (diff) | |
download | gotosocial-e3052e8c825da699162ea25367e860ac3c66f461.tar.xz |
[bugfix] Don't return Account or Status if new and dereferencing failed, other small fixes (#2563)
* tidy up account, status, webfingering logic a wee bit
* go fmt
* invert published check
* alter resp initialization
* get Published from account in typeutils
* don't instantiate error for no darn good reason
* shadow err
* don't repeat error codes in wrapped errors
* don't wrap error unnecessarily
Diffstat (limited to 'internal/federation')
-rw-r--r-- | internal/federation/dereferencing/account.go | 188 | ||||
-rw-r--r-- | internal/federation/dereferencing/finger.go | 105 | ||||
-rw-r--r-- | internal/federation/dereferencing/status.go | 48 |
3 files changed, 252 insertions, 89 deletions
diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go index dcb6116d2..067203780 100644 --- a/internal/federation/dereferencing/account.go +++ b/internal/federation/dereferencing/account.go @@ -52,7 +52,7 @@ func accountUpToDate(account *gtsmodel.Account, force bool) bool { return true } - if !account.CreatedAt.IsZero() && account.IsInstance() { + if account.IsInstance() && !account.IsNew() { // Existing instance account. No need for update. return true } @@ -232,8 +232,8 @@ func (d *Dereferencer) getAccountByUsernameDomain( // Create and pass-through a new bare-bones model for dereferencing. account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, >smodel.Account{ ID: id.NewULID(), - Username: username, Domain: domain, + Username: username, }, nil) if err != nil { return nil, nil, err @@ -372,17 +372,18 @@ func (d *Dereferencer) enrichAccountSafely( // By default use account.URI // as the per-URI deref lock. - uriStr := account.URI - - if uriStr == "" { + var lockKey string + if account.URI != "" { + lockKey = account.URI + } else { // No URI is set yet, instead generate a faux-one from user+domain. - uriStr = "https://" + account.Domain + "/user/" + account.Username + lockKey = "https://" + account.Domain + "/users/" + account.Username } // Acquire per-URI deref lock, wraping unlock // to safely defer in case of panic, while still // performing more granular unlocks when needed. - unlock := d.state.FedLocks.Lock(uriStr) + unlock := d.state.FedLocks.Lock(lockKey) unlock = doOnce(unlock) defer unlock() @@ -394,10 +395,30 @@ func (d *Dereferencer) enrichAccountSafely( accountable, ) - if gtserror.StatusCode(err) >= 400 { - // Update fetch-at to slow re-attempts. + if code := gtserror.StatusCode(err); code >= 400 { + // No matter what, log the error + // so instance admins have an idea + // why something isn't working. + log.Info(ctx, err) + + if account.IsNew() { + // This was a new account enrich + // attempt which failed before we + // got to store it, so we can't + // return anything useful. + return nil, nil, err + } + + // We had this account stored already + // before this enrichment attempt. + // + // Update fetched_at to slow re-attempts + // but don't return early. We can still + // return the model we had stored already. account.FetchedAt = time.Now() - _ = d.state.DB.UpdateAccount(ctx, account, "fetched_at") + if err := d.state.DB.UpdateAccount(ctx, account, "fetched_at"); err != nil { + log.Errorf(ctx, "error updating account fetched_at: %v", err) + } } // Unlock now @@ -414,7 +435,7 @@ func (d *Dereferencer) enrichAccountSafely( // in a call to db.Put(Account). Look again in DB by URI. latest, err = d.state.DB.GetAccountByURI(ctx, account.URI) if err != nil { - err = gtserror.Newf("error getting account %s from database after race: %w", uriStr, err) + err = gtserror.Newf("error getting account %s from database after race: %w", lockKey, err) } } @@ -440,32 +461,44 @@ func (d *Dereferencer) enrichAccount( if account.Username != "" { // A username was provided so we can attempt a webfinger, this ensures up-to-date accountdomain info. accDomain, accURI, err := d.fingerRemoteAccount(ctx, tsport, account.Username, account.Domain) - if err != nil { - if account.URI == "" { - // this is a new account (to us) with username@domain - // but failed webfinger, nothing more we can do. - err := gtserror.Newf("error webfingering account: %w", err) - return nil, nil, gtserror.SetUnretrievable(err) - } + switch { - // Simply log this error and move on, we already have an account URI. - log.Errorf(ctx, "error webfingering[1] remote account %s@%s: %v", account.Username, account.Domain, err) - } + case err != nil && account.URI == "": + // This is a new account (to us) with username@domain + // but failed webfinger, nothing more we can do. + err := gtserror.Newf("error webfingering account: %w", err) + return nil, nil, gtserror.SetUnretrievable(err) - if err == nil { - if account.Domain != accDomain { - // After webfinger, we now have correct account domain from which we can do a final DB check. - alreadyAccount, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err) - } + case err != nil: + // Simply log this error and move on, + // we already have an account URI. + log.Errorf(ctx, + "error webfingering[1] remote account %s@%s: %v", + account.Username, account.Domain, err, + ) + + case err == nil && account.Domain != accDomain: + // After webfinger, we now have correct account domain from which we can do a final DB check. + alreadyAcct, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err) + } - if alreadyAccount != nil { - // Enrich existing account. - account = alreadyAccount - } + if alreadyAcct != nil { + // We had this account stored under + // the discovered accountDomain. + // Proceed with this account. + account = alreadyAcct } + // Whether we had the account or not, we + // now have webfinger info relevant to the + // account, so fallthrough to set webfinger + // info on either the account we just found, + // or the stub account we were passed. + fallthrough + + case err == nil: // Update account with latest info. account.URI = accURI.String() account.Domain = accDomain @@ -474,13 +507,31 @@ func (d *Dereferencer) enrichAccount( } if uri == nil { - // No URI provided / found, must parse from account. + // No URI provided / found, + // must parse from account. uri, err = url.Parse(account.URI) if err != nil { - return nil, nil, gtserror.Newf("invalid uri %q: %w", account.URI, err) + return nil, nil, gtserror.Newf( + "invalid uri %q: %w", + account.URI, gtserror.SetUnretrievable(err), + ) + } + + if uri.Scheme != "http" && uri.Scheme != "https" { + err = errors.New("account URI scheme must be http or https") + return nil, nil, gtserror.Newf( + "invalid uri %q: %w", + account.URI, gtserror.SetUnretrievable(err), + ) } } + /* + BY THIS POINT we must have an account URI set, + either provided, pinned to the account, or + obtained via webfinger call. + */ + // Check whether this account URI is a blocked domain / subdomain. if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil { return nil, nil, gtserror.Newf("error checking blocked domain: %w", err) @@ -493,27 +544,45 @@ func (d *Dereferencer) enrichAccount( defer d.stopHandshake(requestUser, uri) if apubAcc == nil { + // We were not given any (partial) ActivityPub + // version of this account as a parameter. // Dereference latest version of the account. b, err := tsport.Dereference(ctx, uri) if err != nil { - err := gtserror.Newf("error deferencing %s: %w", uri, err) + err := gtserror.Newf("error dereferencing %s: %w", uri, err) return nil, nil, gtserror.SetUnretrievable(err) } // Attempt to resolve ActivityPub acc from data. apubAcc, err = ap.ResolveAccountable(ctx, b) if err != nil { - return nil, nil, gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err) + // ResolveAccountable will set Unretrievable/WrongType + // on the returned error, so we don't need to do it here. + err = gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err) + return nil, nil, err } } + /* + BY THIS POINT we must have the ActivityPub + representation of the account, either provided, + or obtained via a dereference call. + */ + // Convert the dereferenced AP account object to our GTS model. + // + // We put this in the variable latestAcc because we might want + // to compare the provided account model with this fresh version, + // in order to check if anything changed since we last saw it. latestAcc, err := d.converter.ASRepresentationToAccount(ctx, apubAcc, account.Domain, ) if err != nil { - return nil, nil, gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err) + // ASRepresentationToAccount will set Malformed on the + // returned error, so we don't need to do it here. + err = gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err) + return nil, nil, err } if account.Username == "" { @@ -528,14 +597,15 @@ func (d *Dereferencer) enrichAccount( // from example.org to somewhere.else: we want to take somewhere.else // as the accountDomain then, not the example.org we were redirected from. - // Assume the host from the returned ActivityPub representation. - idProp := apubAcc.GetJSONLDId() - if idProp == nil || !idProp.IsIRI() { + // Assume the host from the returned + // ActivityPub representation. + id := ap.GetJSONLDId(apubAcc) + if id == nil { return nil, nil, gtserror.New("no id property found on person, or id was not an iri") } // Get IRI host value. - accHost := idProp.GetIRI().Host + accHost := id.Host latestAcc.Domain, _, err = d.fingerRemoteAccount(ctx, tsport, @@ -553,7 +623,7 @@ func (d *Dereferencer) enrichAccount( } if latestAcc.Domain == "" { - // ensure we have a domain set by this point, + // Ensure we have a domain set by this point, // otherwise it gets stored as a local user! // // TODO: there is probably a more granular way @@ -562,7 +632,16 @@ func (d *Dereferencer) enrichAccount( return nil, nil, gtserror.Newf("empty domain for %s", uri) } - // Ensure ID is set and update fetch time. + /* + BY THIS POINT we have more or less a fullly-formed + representation of the target account, derived from + a combination of webfinger lookups and dereferencing. + Further fetching beyond this point is for peripheral + things like account avatar, header, emojis. + */ + + // Ensure internal db ID is + // set and update fetch time. latestAcc.ID = account.ID latestAcc.FetchedAt = time.Now() @@ -581,12 +660,14 @@ func (d *Dereferencer) enrichAccount( log.Errorf(ctx, "error fetching remote emojis for account %s: %v", uri, err) } - if account.CreatedAt.IsZero() { - // CreatedAt will be zero if no local copy was - // found in one of the GetAccountBy___() functions. - // - // Set time of creation from the last-fetched date. - latestAcc.CreatedAt = latestAcc.FetchedAt + if account.IsNew() { + // Prefer published/created time from + // apubAcc, fall back to FetchedAt value. + if latestAcc.CreatedAt.IsZero() { + latestAcc.CreatedAt = latestAcc.FetchedAt + } + + // Set time of update from the last-fetched date. latestAcc.UpdatedAt = latestAcc.FetchedAt // This is new, put it in the database. @@ -595,11 +676,16 @@ func (d *Dereferencer) enrichAccount( return nil, nil, gtserror.Newf("error putting in database: %w", err) } } else { + // Prefer published time from apubAcc, + // fall back to previous stored value. + if latestAcc.CreatedAt.IsZero() { + latestAcc.CreatedAt = account.CreatedAt + } + // Set time of update from the last-fetched date. latestAcc.UpdatedAt = latestAcc.FetchedAt - // Use existing account values. - latestAcc.CreatedAt = account.CreatedAt + // Carry over existing account language. latestAcc.Language = account.Language // This is an existing account, update the model in the database. diff --git a/internal/federation/dereferencing/finger.go b/internal/federation/dereferencing/finger.go index a81afa5ea..514a058ba 100644 --- a/internal/federation/dereferencing/finger.go +++ b/internal/federation/dereferencing/finger.go @@ -20,54 +20,109 @@ package dereferencing import ( "context" "encoding/json" - "errors" - "fmt" "net/url" "strings" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/transport" "github.com/superseriousbusiness/gotosocial/internal/util" ) -func (d *Dereferencer) fingerRemoteAccount(ctx context.Context, transport transport.Transport, targetUsername string, targetHost string) (accountDomain string, accountURI *url.URL, err error) { - b, err := transport.Finger(ctx, targetUsername, targetHost) +// fingerRemoteAccount performs a webfinger call for the +// given username and host, using the provided transport. +// +// The webfinger response will be parsed, and the subject +// domain and AP URI will be extracted and returned. +// +// In case the response cannot be parsed, or the response +// does not contain a valid subject string or AP URI, an +// error will be returned instead. +func (d *Dereferencer) fingerRemoteAccount( + ctx context.Context, + transport transport.Transport, + username string, + host string, +) ( + string, // discovered account domain + *url.URL, // discovered account URI + error, +) { + // Assemble target namestring for logging. + var target = "@" + username + "@" + host + + b, err := transport.Finger(ctx, username, host) if err != nil { - err = fmt.Errorf("fingerRemoteAccount: error fingering @%s@%s: %s", targetUsername, targetHost, err) - return + err = gtserror.Newf("error webfingering %s: %w", target, err) + return "", nil, err } - resp := &apimodel.WellKnownResponse{} - if err = json.Unmarshal(b, resp); err != nil { - err = fmt.Errorf("fingerRemoteAccount: could not unmarshal server response as WebfingerAccountResponse while dereferencing @%s@%s: %s", targetUsername, targetHost, err) - return + var resp apimodel.WellKnownResponse + if err := json.Unmarshal(b, &resp); err != nil { + err = gtserror.Newf("error parsing response as JSON for %s: %w", target, err) + return "", nil, err } if len(resp.Links) == 0 { - err = fmt.Errorf("fingerRemoteAccount: no links found in webfinger response %s", string(b)) - return + err = gtserror.Newf("no links found in response for %s", target) + return "", nil, err } if resp.Subject == "" { - err = fmt.Errorf("fingerRemoteAccount: no subject found in webfinger response %s", string(b)) - return + err = gtserror.Newf("no subject found in response for %s", target) + return "", nil, err } - _, accountDomain, err = util.ExtractWebfingerParts(resp.Subject) + _, accountDomain, err := util.ExtractWebfingerParts(resp.Subject) if err != nil { - err = fmt.Errorf("fingerRemoteAccount: error extracting webfinger subject parts: %s", err) + err = gtserror.Newf("error extracting subject parts for %s: %w", target, err) + return "", nil, err } - // look through the links for the first one that matches what we need - for _, l := range resp.Links { - if l.Rel == "self" && (strings.EqualFold(l.Type, "application/activity+json") || strings.EqualFold(l.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"")) { - if uri, thiserr := url.Parse(l.Href); thiserr == nil && (uri.Scheme == "http" || uri.Scheme == "https") { - // found it! - accountURI = uri - return - } + // Look through links for the first + // one that matches what we need: + // + // - Must be self link. + // - Must be AP type. + // - Valid https/http URI. + for _, link := range resp.Links { + if link.Rel != "self" { + // Not self link, ignore. + continue + } + + if !strings.EqualFold(link.Type, "application/activity+json") && + !strings.EqualFold(link.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"") { + // Not an AP type, ignore. + continue + } + + uri, err := url.Parse(link.Href) + if err != nil { + log.Warnf(ctx, + "invalid href for ActivityPub self link %s for %s: %v", + link.Href, target, err, + ) + + // Funky URL, ignore. + continue + } + + if uri.Scheme != "http" && uri.Scheme != "https" { + log.Warnf(ctx, + "invalid href for ActivityPub self link %s for %s: schema must be http or https", + link.Href, target, + ) + + // Can't handle this + // schema, ignore. + continue } + + // All looks good, return happily! + return accountDomain, uri, nil } - return "", nil, errors.New("fingerRemoteAccount: no match found in webfinger response") + return "", nil, gtserror.Newf("no suitable self, AP-type link found in webfinger response for %s", target) } diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index 1eb7d6fdb..7dc22a354 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -294,10 +294,30 @@ func (d *Dereferencer) enrichStatusSafely( apubStatus, ) - if gtserror.StatusCode(err) >= 400 { - // Update fetch-at to slow re-attempts. + if code := gtserror.StatusCode(err); code >= 400 { + // No matter what, log the error + // so instance admins have an idea + // why something isn't working. + log.Info(ctx, err) + + if isNew { + // This was a new status enrich + // attempt which failed before we + // got to store it, so we can't + // return anything useful. + return nil, nil, isNew, err + } + + // We had this status stored already + // before this enrichment attempt. + // + // Update fetched_at to slow re-attempts + // but don't return early. We can still + // return the model we had stored already. status.FetchedAt = time.Now() - _ = d.state.DB.UpdateStatus(ctx, status, "fetched_at") + if err := d.state.DB.UpdateStatus(ctx, status, "fetched_at"); err != nil { + log.Errorf(ctx, "error updating status fetched_at: %v", err) + } } // Unlock now @@ -358,7 +378,7 @@ func (d *Dereferencer) enrichStatus( // Dereference latest version of the status. b, err := tsport.Dereference(ctx, uri) if err != nil { - err := gtserror.Newf("error deferencing %s: %w", uri, err) + err := gtserror.Newf("error dereferencing %s: %w", uri, err) return nil, nil, gtserror.SetUnretrievable(err) } @@ -388,16 +408,21 @@ func (d *Dereferencer) enrichStatus( return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err) } - // Use existing status ID. - latestStatus.ID = status.ID - if latestStatus.ID == "" { - - // Generate new status ID from the provided creation date. + // Check if we've previously + // stored this status in the DB. + // If we have, it'll be ID'd. + var isNew = (status.ID == "") + if isNew { + // No ID, we haven't stored this status before. + // Generate new status ID from the status publication time. latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt) if err != nil { log.Errorf(ctx, "invalid created at date (falling back to 'now'): %v", err) latestStatus.ID = id.NewULID() // just use "now" } + } else { + // Reuse existing status ID. + latestStatus.ID = status.ID } // Carry-over values and set fetch time. @@ -436,10 +461,7 @@ func (d *Dereferencer) enrichStatus( return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err) } - if status.CreatedAt.IsZero() { - // CreatedAt will be zero if no local copy was - // found in one of the GetStatusBy___() functions. - // + if isNew { // This is new, put the status in the database. err := d.state.DB.PutStatus(ctx, latestStatus) if err != nil { |