diff options
| author | 2024-01-26 14:17:10 +0100 | |
|---|---|---|
| committer | 2024-01-26 14:17:10 +0100 | |
| commit | e3052e8c825da699162ea25367e860ac3c66f461 (patch) | |
| tree | 3d13e83413d4a85ab694034d6c9772f9ec64268a /internal/federation | |
| parent | [performance] cache library performance enhancements (updates go-structr => v... (diff) | |
| download | gotosocial-e3052e8c825da699162ea25367e860ac3c66f461.tar.xz | |
[bugfix] Don't return Account or Status if new and dereferencing failed, other small fixes (#2563)
* tidy up account, status, webfingering logic a wee bit
* go fmt
* invert published check
* alter resp initialization
* get Published from account in typeutils
* don't instantiate error for no darn good reason
* shadow err
* don't repeat error codes in wrapped errors
* don't wrap error unnecessarily
Diffstat (limited to 'internal/federation')
| -rw-r--r-- | internal/federation/dereferencing/account.go | 188 | ||||
| -rw-r--r-- | internal/federation/dereferencing/finger.go | 105 | ||||
| -rw-r--r-- | internal/federation/dereferencing/status.go | 48 | 
3 files changed, 252 insertions, 89 deletions
| diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go index dcb6116d2..067203780 100644 --- a/internal/federation/dereferencing/account.go +++ b/internal/federation/dereferencing/account.go @@ -52,7 +52,7 @@ func accountUpToDate(account *gtsmodel.Account, force bool) bool {  		return true  	} -	if !account.CreatedAt.IsZero() && account.IsInstance() { +	if account.IsInstance() && !account.IsNew() {  		// Existing instance account. No need for update.  		return true  	} @@ -232,8 +232,8 @@ func (d *Dereferencer) getAccountByUsernameDomain(  		// Create and pass-through a new bare-bones model for dereferencing.  		account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, >smodel.Account{  			ID:       id.NewULID(), -			Username: username,  			Domain:   domain, +			Username: username,  		}, nil)  		if err != nil {  			return nil, nil, err @@ -372,17 +372,18 @@ func (d *Dereferencer) enrichAccountSafely(  	// By default use account.URI  	// as the per-URI deref lock. -	uriStr := account.URI - -	if uriStr == "" { +	var lockKey string +	if account.URI != "" { +		lockKey = account.URI +	} else {  		// No URI is set yet, instead generate a faux-one from user+domain. -		uriStr = "https://" + account.Domain + "/user/" + account.Username +		lockKey = "https://" + account.Domain + "/users/" + account.Username  	}  	// Acquire per-URI deref lock, wraping unlock  	// to safely defer in case of panic, while still  	// performing more granular unlocks when needed. -	unlock := d.state.FedLocks.Lock(uriStr) +	unlock := d.state.FedLocks.Lock(lockKey)  	unlock = doOnce(unlock)  	defer unlock() @@ -394,10 +395,30 @@ func (d *Dereferencer) enrichAccountSafely(  		accountable,  	) -	if gtserror.StatusCode(err) >= 400 { -		// Update fetch-at to slow re-attempts. +	if code := gtserror.StatusCode(err); code >= 400 { +		// No matter what, log the error +		// so instance admins have an idea +		// why something isn't working. +		log.Info(ctx, err) + +		if account.IsNew() { +			// This was a new account enrich +			// attempt which failed before we +			// got to store it, so we can't +			// return anything useful. +			return nil, nil, err +		} + +		// We had this account stored already +		// before this enrichment attempt. +		// +		// Update fetched_at to slow re-attempts +		// but don't return early. We can still +		// return the model we had stored already.  		account.FetchedAt = time.Now() -		_ = d.state.DB.UpdateAccount(ctx, account, "fetched_at") +		if err := d.state.DB.UpdateAccount(ctx, account, "fetched_at"); err != nil { +			log.Errorf(ctx, "error updating account fetched_at: %v", err) +		}  	}  	// Unlock now @@ -414,7 +435,7 @@ func (d *Dereferencer) enrichAccountSafely(  		// in a call to db.Put(Account). Look again in DB by URI.  		latest, err = d.state.DB.GetAccountByURI(ctx, account.URI)  		if err != nil { -			err = gtserror.Newf("error getting account %s from database after race: %w", uriStr, err) +			err = gtserror.Newf("error getting account %s from database after race: %w", lockKey, err)  		}  	} @@ -440,32 +461,44 @@ func (d *Dereferencer) enrichAccount(  	if account.Username != "" {  		// A username was provided so we can attempt a webfinger, this ensures up-to-date accountdomain info.  		accDomain, accURI, err := d.fingerRemoteAccount(ctx, tsport, account.Username, account.Domain) -		if err != nil { -			if account.URI == "" { -				// this is a new account (to us) with username@domain -				// but failed webfinger, nothing more we can do. -				err := gtserror.Newf("error webfingering account: %w", err) -				return nil, nil, gtserror.SetUnretrievable(err) -			} +		switch { -			// Simply log this error and move on, we already have an account URI. -			log.Errorf(ctx, "error webfingering[1] remote account %s@%s: %v", account.Username, account.Domain, err) -		} +		case err != nil && account.URI == "": +			// This is a new account (to us) with username@domain +			// but failed webfinger, nothing more we can do. +			err := gtserror.Newf("error webfingering account: %w", err) +			return nil, nil, gtserror.SetUnretrievable(err) -		if err == nil { -			if account.Domain != accDomain { -				// After webfinger, we now have correct account domain from which we can do a final DB check. -				alreadyAccount, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) -				if err != nil && !errors.Is(err, db.ErrNoEntries) { -					return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err) -				} +		case err != nil: +			// Simply log this error and move on, +			// we already have an account URI. +			log.Errorf(ctx, +				"error webfingering[1] remote account %s@%s: %v", +				account.Username, account.Domain, err, +			) + +		case err == nil && account.Domain != accDomain: +			// After webfinger, we now have correct account domain from which we can do a final DB check. +			alreadyAcct, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) +			if err != nil && !errors.Is(err, db.ErrNoEntries) { +				return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err) +			} -				if alreadyAccount != nil { -					// Enrich existing account. -					account = alreadyAccount -				} +			if alreadyAcct != nil { +				// We had this account stored under +				// the discovered accountDomain. +				// Proceed with this account. +				account = alreadyAcct  			} +			// Whether we had the account or not, we +			// now have webfinger info relevant to the +			// account, so fallthrough to set webfinger +			// info on either the account we just found, +			// or the stub account we were passed. +			fallthrough + +		case err == nil:  			// Update account with latest info.  			account.URI = accURI.String()  			account.Domain = accDomain @@ -474,13 +507,31 @@ func (d *Dereferencer) enrichAccount(  	}  	if uri == nil { -		// No URI provided / found, must parse from account. +		// No URI provided / found, +		// must parse from account.  		uri, err = url.Parse(account.URI)  		if err != nil { -			return nil, nil, gtserror.Newf("invalid uri %q: %w", account.URI, err) +			return nil, nil, gtserror.Newf( +				"invalid uri %q: %w", +				account.URI, gtserror.SetUnretrievable(err), +			) +		} + +		if uri.Scheme != "http" && uri.Scheme != "https" { +			err = errors.New("account URI scheme must be http or https") +			return nil, nil, gtserror.Newf( +				"invalid uri %q: %w", +				account.URI, gtserror.SetUnretrievable(err), +			)  		}  	} +	/* +		BY THIS POINT we must have an account URI set, +		either provided, pinned to the account, or +		obtained via webfinger call. +	*/ +  	// Check whether this account URI is a blocked domain / subdomain.  	if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil {  		return nil, nil, gtserror.Newf("error checking blocked domain: %w", err) @@ -493,27 +544,45 @@ func (d *Dereferencer) enrichAccount(  	defer d.stopHandshake(requestUser, uri)  	if apubAcc == nil { +		// We were not given any (partial) ActivityPub +		// version of this account as a parameter.  		// Dereference latest version of the account.  		b, err := tsport.Dereference(ctx, uri)  		if err != nil { -			err := gtserror.Newf("error deferencing %s: %w", uri, err) +			err := gtserror.Newf("error dereferencing %s: %w", uri, err)  			return nil, nil, gtserror.SetUnretrievable(err)  		}  		// Attempt to resolve ActivityPub acc from data.  		apubAcc, err = ap.ResolveAccountable(ctx, b)  		if err != nil { -			return nil, nil, gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err) +			// ResolveAccountable will set Unretrievable/WrongType +			// on the returned error, so we don't need to do it here. +			err = gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err) +			return nil, nil, err  		}  	} +	/* +		BY THIS POINT we must have the ActivityPub +		representation of the account, either provided, +		or obtained via a dereference call. +	*/ +  	// Convert the dereferenced AP account object to our GTS model. +	// +	// We put this in the variable latestAcc because we might want +	// to compare the provided account model with this fresh version, +	// in order to check if anything changed since we last saw it.  	latestAcc, err := d.converter.ASRepresentationToAccount(ctx,  		apubAcc,  		account.Domain,  	)  	if err != nil { -		return nil, nil, gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err) +		// ASRepresentationToAccount will set Malformed on the +		// returned error, so we don't need to do it here. +		err = gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err) +		return nil, nil, err  	}  	if account.Username == "" { @@ -528,14 +597,15 @@ func (d *Dereferencer) enrichAccount(  		// from example.org to somewhere.else: we want to take somewhere.else  		// as the accountDomain then, not the example.org we were redirected from. -		// Assume the host from the returned ActivityPub representation. -		idProp := apubAcc.GetJSONLDId() -		if idProp == nil || !idProp.IsIRI() { +		// Assume the host from the returned +		// ActivityPub representation. +		id := ap.GetJSONLDId(apubAcc) +		if id == nil {  			return nil, nil, gtserror.New("no id property found on person, or id was not an iri")  		}  		// Get IRI host value. -		accHost := idProp.GetIRI().Host +		accHost := id.Host  		latestAcc.Domain, _, err = d.fingerRemoteAccount(ctx,  			tsport, @@ -553,7 +623,7 @@ func (d *Dereferencer) enrichAccount(  	}  	if latestAcc.Domain == "" { -		// ensure we have a domain set by this point, +		// Ensure we have a domain set by this point,  		// otherwise it gets stored as a local user!  		//  		// TODO: there is probably a more granular way @@ -562,7 +632,16 @@ func (d *Dereferencer) enrichAccount(  		return nil, nil, gtserror.Newf("empty domain for %s", uri)  	} -	// Ensure ID is set and update fetch time. +	/* +		BY THIS POINT we have more or less a fullly-formed +		representation of the target account, derived from +		a combination of webfinger lookups and dereferencing. +		Further fetching beyond this point is for peripheral +		things like account avatar, header, emojis. +	*/ + +	// Ensure internal db ID is +	// set and update fetch time.  	latestAcc.ID = account.ID  	latestAcc.FetchedAt = time.Now() @@ -581,12 +660,14 @@ func (d *Dereferencer) enrichAccount(  		log.Errorf(ctx, "error fetching remote emojis for account %s: %v", uri, err)  	} -	if account.CreatedAt.IsZero() { -		// CreatedAt will be zero if no local copy was -		// found in one of the GetAccountBy___() functions. -		// -		// Set time of creation from the last-fetched date. -		latestAcc.CreatedAt = latestAcc.FetchedAt +	if account.IsNew() { +		// Prefer published/created time from +		// apubAcc, fall back to FetchedAt value. +		if latestAcc.CreatedAt.IsZero() { +			latestAcc.CreatedAt = latestAcc.FetchedAt +		} + +		// Set time of update from the last-fetched date.  		latestAcc.UpdatedAt = latestAcc.FetchedAt  		// This is new, put it in the database. @@ -595,11 +676,16 @@ func (d *Dereferencer) enrichAccount(  			return nil, nil, gtserror.Newf("error putting in database: %w", err)  		}  	} else { +		// Prefer published time from apubAcc, +		// fall back to previous stored value. +		if latestAcc.CreatedAt.IsZero() { +			latestAcc.CreatedAt = account.CreatedAt +		} +  		// Set time of update from the last-fetched date.  		latestAcc.UpdatedAt = latestAcc.FetchedAt -		// Use existing account values. -		latestAcc.CreatedAt = account.CreatedAt +		// Carry over existing account language.  		latestAcc.Language = account.Language  		// This is an existing account, update the model in the database. diff --git a/internal/federation/dereferencing/finger.go b/internal/federation/dereferencing/finger.go index a81afa5ea..514a058ba 100644 --- a/internal/federation/dereferencing/finger.go +++ b/internal/federation/dereferencing/finger.go @@ -20,54 +20,109 @@ package dereferencing  import (  	"context"  	"encoding/json" -	"errors" -	"fmt"  	"net/url"  	"strings"  	apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" +	"github.com/superseriousbusiness/gotosocial/internal/gtserror" +	"github.com/superseriousbusiness/gotosocial/internal/log"  	"github.com/superseriousbusiness/gotosocial/internal/transport"  	"github.com/superseriousbusiness/gotosocial/internal/util"  ) -func (d *Dereferencer) fingerRemoteAccount(ctx context.Context, transport transport.Transport, targetUsername string, targetHost string) (accountDomain string, accountURI *url.URL, err error) { -	b, err := transport.Finger(ctx, targetUsername, targetHost) +// fingerRemoteAccount performs a webfinger call for the +// given username and host, using the provided transport. +// +// The webfinger response will be parsed, and the subject +// domain and AP URI will be extracted and returned. +// +// In case the response cannot be parsed, or the response +// does not contain a valid subject string or AP URI, an +// error will be returned instead. +func (d *Dereferencer) fingerRemoteAccount( +	ctx context.Context, +	transport transport.Transport, +	username string, +	host string, +) ( +	string, // discovered account domain +	*url.URL, // discovered account URI +	error, +) { +	// Assemble target namestring for logging. +	var target = "@" + username + "@" + host + +	b, err := transport.Finger(ctx, username, host)  	if err != nil { -		err = fmt.Errorf("fingerRemoteAccount: error fingering @%s@%s: %s", targetUsername, targetHost, err) -		return +		err = gtserror.Newf("error webfingering %s: %w", target, err) +		return "", nil, err  	} -	resp := &apimodel.WellKnownResponse{} -	if err = json.Unmarshal(b, resp); err != nil { -		err = fmt.Errorf("fingerRemoteAccount: could not unmarshal server response as WebfingerAccountResponse while dereferencing @%s@%s: %s", targetUsername, targetHost, err) -		return +	var resp apimodel.WellKnownResponse +	if err := json.Unmarshal(b, &resp); err != nil { +		err = gtserror.Newf("error parsing response as JSON for %s: %w", target, err) +		return "", nil, err  	}  	if len(resp.Links) == 0 { -		err = fmt.Errorf("fingerRemoteAccount: no links found in webfinger response %s", string(b)) -		return +		err = gtserror.Newf("no links found in response for %s", target) +		return "", nil, err  	}  	if resp.Subject == "" { -		err = fmt.Errorf("fingerRemoteAccount: no subject found in webfinger response %s", string(b)) -		return +		err = gtserror.Newf("no subject found in response for %s", target) +		return "", nil, err  	} -	_, accountDomain, err = util.ExtractWebfingerParts(resp.Subject) +	_, accountDomain, err := util.ExtractWebfingerParts(resp.Subject)  	if err != nil { -		err = fmt.Errorf("fingerRemoteAccount: error extracting webfinger subject parts: %s", err) +		err = gtserror.Newf("error extracting subject parts for %s: %w", target, err) +		return "", nil, err  	} -	// look through the links for the first one that matches what we need -	for _, l := range resp.Links { -		if l.Rel == "self" && (strings.EqualFold(l.Type, "application/activity+json") || strings.EqualFold(l.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"")) { -			if uri, thiserr := url.Parse(l.Href); thiserr == nil && (uri.Scheme == "http" || uri.Scheme == "https") { -				// found it! -				accountURI = uri -				return -			} +	// Look through links for the first +	// one that matches what we need: +	// +	//   - Must be self link. +	//   - Must be AP type. +	//   - Valid https/http URI. +	for _, link := range resp.Links { +		if link.Rel != "self" { +			// Not self link, ignore. +			continue +		} + +		if !strings.EqualFold(link.Type, "application/activity+json") && +			!strings.EqualFold(link.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"") { +			// Not an AP type, ignore. +			continue +		} + +		uri, err := url.Parse(link.Href) +		if err != nil { +			log.Warnf(ctx, +				"invalid href for ActivityPub self link %s for %s: %v", +				link.Href, target, err, +			) + +			// Funky URL, ignore. +			continue +		} + +		if uri.Scheme != "http" && uri.Scheme != "https" { +			log.Warnf(ctx, +				"invalid href for ActivityPub self link %s for %s: schema must be http or https", +				link.Href, target, +			) + +			// Can't handle this +			// schema, ignore. +			continue  		} + +		// All looks good, return happily! +		return accountDomain, uri, nil  	} -	return "", nil, errors.New("fingerRemoteAccount: no match found in webfinger response") +	return "", nil, gtserror.Newf("no suitable self, AP-type link found in webfinger response for %s", target)  } diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index 1eb7d6fdb..7dc22a354 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -294,10 +294,30 @@ func (d *Dereferencer) enrichStatusSafely(  		apubStatus,  	) -	if gtserror.StatusCode(err) >= 400 { -		// Update fetch-at to slow re-attempts. +	if code := gtserror.StatusCode(err); code >= 400 { +		// No matter what, log the error +		// so instance admins have an idea +		// why something isn't working. +		log.Info(ctx, err) + +		if isNew { +			// This was a new status enrich +			// attempt which failed before we +			// got to store it, so we can't +			// return anything useful. +			return nil, nil, isNew, err +		} + +		// We had this status stored already +		// before this enrichment attempt. +		// +		// Update fetched_at to slow re-attempts +		// but don't return early. We can still +		// return the model we had stored already.  		status.FetchedAt = time.Now() -		_ = d.state.DB.UpdateStatus(ctx, status, "fetched_at") +		if err := d.state.DB.UpdateStatus(ctx, status, "fetched_at"); err != nil { +			log.Errorf(ctx, "error updating status fetched_at: %v", err) +		}  	}  	// Unlock now @@ -358,7 +378,7 @@ func (d *Dereferencer) enrichStatus(  		// Dereference latest version of the status.  		b, err := tsport.Dereference(ctx, uri)  		if err != nil { -			err := gtserror.Newf("error deferencing %s: %w", uri, err) +			err := gtserror.Newf("error dereferencing %s: %w", uri, err)  			return nil, nil, gtserror.SetUnretrievable(err)  		} @@ -388,16 +408,21 @@ func (d *Dereferencer) enrichStatus(  		return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err)  	} -	// Use existing status ID. -	latestStatus.ID = status.ID -	if latestStatus.ID == "" { - -		// Generate new status ID from the provided creation date. +	// Check if we've previously +	// stored this status in the DB. +	// If we have, it'll be ID'd. +	var isNew = (status.ID == "") +	if isNew { +		// No ID, we haven't stored this status before. +		// Generate new status ID from the status publication time.  		latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt)  		if err != nil {  			log.Errorf(ctx, "invalid created at date (falling back to 'now'): %v", err)  			latestStatus.ID = id.NewULID() // just use "now"  		} +	} else { +		// Reuse existing status ID. +		latestStatus.ID = status.ID  	}  	// Carry-over values and set fetch time. @@ -436,10 +461,7 @@ func (d *Dereferencer) enrichStatus(  		return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err)  	} -	if status.CreatedAt.IsZero() { -		// CreatedAt will be zero if no local copy was -		// found in one of the GetStatusBy___() functions. -		// +	if isNew {  		// This is new, put the status in the database.  		err := d.state.DB.PutStatus(ctx, latestStatus)  		if err != nil { | 
