diff options
| author | 2023-05-12 10:15:54 +0100 | |
|---|---|---|
| committer | 2023-05-12 11:15:54 +0200 | |
| commit | 6c9d8e78ebbd81928abab1bbb49a3c4daa7d2a3a (patch) | |
| tree | 0bb314c76b87606371283819175eda02026897a9 /internal/federation/dereferencing/status.go | |
| parent | [chore] Prefer JSON errors in API endpoints (#1766) (diff) | |
| download | gotosocial-6c9d8e78ebbd81928abab1bbb49a3c4daa7d2a3a.tar.xz | |
[feature] status refetch support (#1690)
* revamp http client to not limit requests, instead use sender worker
Signed-off-by: kim <grufwub@gmail.com>
* remove separate sender worker pool, spawn 2*GOMAXPROCS batch senders each time, no need for transport cache sweeping
Signed-off-by: kim <grufwub@gmail.com>
* improve batch senders to keep popping recipients until remote URL found
Signed-off-by: kim <grufwub@gmail.com>
* fix recipient looping issue
Signed-off-by: kim <grufwub@gmail.com>
* move request id ctx key to gtscontext, finish filling out more code comments, add basic support for not logging client IP
Signed-off-by: kim <grufwub@gmail.com>
* first draft of status refetching logic
Signed-off-by: kim <grufwub@gmail.com>
* fix testrig to use new federation alloc func signature
Signed-off-by: kim <grufwub@gmail.com>
* fix log format directive
Signed-off-by: kim <grufwub@gmail.com>
* add status fetched_at migration
Signed-off-by: kim <grufwub@gmail.com>
* remove unused / unchecked for error types
Signed-off-by: kim <grufwub@gmail.com>
* add back the used type...
Signed-off-by: kim <grufwub@gmail.com>
* add separate internal getStatus() function for derefThread() that doesn't recurse
Signed-off-by: kim <grufwub@gmail.com>
* improved mention and media attachment error handling
Signed-off-by: kim <grufwub@gmail.com>
* fix log and error format directives
Signed-off-by: kim <grufwub@gmail.com>
* update account deref to match status deref changes
Signed-off-by: kim <grufwub@gmail.com>
* very small code formatting change to make things clearer
Signed-off-by: kim <grufwub@gmail.com>
* add more code comments
Signed-off-by: kim <grufwub@gmail.com>
* improved code commenting
Signed-off-by: kim <grufwub@gmail.com>
* only check for required further derefs if needed
Signed-off-by: kim <grufwub@gmail.com>
* improved cache invalidation
Signed-off-by: kim <grufwub@gmail.com>
* tweak cache restarting to use a (very small) backoff
Signed-off-by: kim <grufwub@gmail.com>
* small readability changes and fixes
Signed-off-by: kim <grufwub@gmail.com>
* fix account sync issues
Signed-off-by: kim <grufwub@gmail.com>
* fix merge conflicts + update account enrichment to accept already-passed accountable
Signed-off-by: kim <grufwub@gmail.com>
* remove secondary function declaration
Signed-off-by: kim <grufwub@gmail.com>
* normalise dereferencer get status / account behaviour, fix remaining tests
Signed-off-by: kim <grufwub@gmail.com>
* fix remaining rebase conflicts, finish commenting code
Signed-off-by: kim <grufwub@gmail.com>
* appease the linter
Signed-off-by: kim <grufwub@gmail.com>
* add source file header
Signed-off-by: kim <grufwub@gmail.com>
* update to use TIMESTAMPTZ column type instead of just TIMESTAMP
Signed-off-by: kim <grufwub@gmail.com>
* don't pass in 'updated_at' to UpdateEmoji()
Signed-off-by: kim <grufwub@gmail.com>
* use new ap.Resolve{Account,Status}able() functions
Signed-off-by: kim <grufwub@gmail.com>
* remove the somewhat confusing rescoping of the same variable names
Signed-off-by: kim <grufwub@gmail.com>
* update migration file name, improved database delete error returns
Signed-off-by: kim <grufwub@gmail.com>
* formatting
Signed-off-by: kim <grufwub@gmail.com>
* improved multi-delete database functions to minimise DB calls
Signed-off-by: kim <grufwub@gmail.com>
* remove unused type
Signed-off-by: kim <grufwub@gmail.com>
* fix delete statements
Signed-off-by: kim <grufwub@gmail.com>
---------
Signed-off-by: kim <grufwub@gmail.com>
Diffstat (limited to 'internal/federation/dereferencing/status.go')
| -rw-r--r-- | internal/federation/dereferencing/status.go | 581 | 
1 files changed, 319 insertions, 262 deletions
| diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index 8e130393a..dfec0605f 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -21,8 +21,9 @@ import (  	"context"  	"errors"  	"fmt" +	"io"  	"net/url" -	"strings" +	"time"  	"github.com/superseriousbusiness/gotosocial/internal/ap"  	"github.com/superseriousbusiness/gotosocial/internal/config" @@ -34,374 +35,430 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/transport"  ) -// EnrichRemoteStatus takes a remote status that's already been inserted into the database in a minimal form, -// and populates it with additional fields, media, etc. -// -// EnrichRemoteStatus is mostly useful for calling after a status has been initially created by -// the federatingDB's Create function, but additional dereferencing is needed on it. -func (d *deref) EnrichRemoteStatus(ctx context.Context, username string, status *gtsmodel.Status, includeParent bool) (*gtsmodel.Status, error) { -	if err := d.populateStatusFields(ctx, status, username, includeParent); err != nil { -		return nil, err +// statusUpToDate returns whether the given status model is both updateable +// (i.e. remote status) and whether it needs an update based on `fetched_at`. +func statusUpToDate(status *gtsmodel.Status) bool { +	if *status.Local { +		// Can't update local statuses. +		return true  	} -	if err := d.db.UpdateStatus(ctx, status); err != nil { -		return nil, err + +	// If this status was updated recently (last interval), we return as-is. +	if next := status.FetchedAt.Add(2 * time.Hour); time.Now().Before(next) { +		return true  	} -	return status, nil + +	return false  } -// GetStatus completely dereferences a status, converts it to a GtS model status, -// puts it in the database, and returns it to a caller. -// -// If refetch is true, then regardless of whether we have the original status in the database or not, -// the ap.Statusable representation of the status will be dereferenced and returned. -// -// If refetch is false, the ap.Statusable will only be returned if this is a new status, so callers -// should check whether or not this is nil. -// -// GetAccount will guard against trying to do http calls to fetch a status that belongs to this instance. -// Instead of making calls, it will just return the status early if it finds it, or return an error. -func (d *deref) GetStatus(ctx context.Context, username string, statusURI *url.URL, refetch, includeParent bool) (*gtsmodel.Status, ap.Statusable, error) { -	uriString := statusURI.String() - -	// try to get by URI first -	status, dbErr := d.db.GetStatusByURI(ctx, uriString) -	if dbErr != nil { -		if !errors.Is(dbErr, db.ErrNoEntries) { -			// real error -			return nil, nil, newErrDB(fmt.Errorf("GetRemoteStatus: error during GetStatusByURI for %s: %w", uriString, dbErr)) -		} -		// no problem, just press on -	} else if !refetch { -		// we already had the status and we aren't being asked to refetch the AP representation -		return status, nil, nil +// GetStatus: implements Dereferencer{}.GetStatus(). +func (d *deref) GetStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) { +	// Fetch and dereference status if necessary. +	status, apubStatus, err := d.getStatusByURI(ctx, +		requestUser, +		uri, +	) +	if err != nil { +		return nil, nil, err +	} + +	if apubStatus != nil { +		// This status was updated, enqueue re-dereferencing the whole thread. +		d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { +			d.dereferenceThread(ctx, requestUser, uri, status, apubStatus) +		}) +	} + +	return status, apubStatus, nil +} + +// getStatusByURI is a package internal form of .GetStatusByURI() that doesn't bother dereferencing the whole thread on update. +func (d *deref) getStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) { +	var ( +		status *gtsmodel.Status +		uriStr = uri.String() +		err    error +	) + +	// Search the database for existing status with ID URI. +	status, err = d.state.DB.GetStatusByURI(ctx, uriStr) +	if err != nil && !errors.Is(err, db.ErrNoEntries) { +		return nil, nil, fmt.Errorf("GetStatusByURI: error checking database for status %s by uri: %w", uriStr, err)  	} -	// try to get by URL if we couldn't get by URI now  	if status == nil { -		status, dbErr = d.db.GetStatusByURL(ctx, uriString) -		if dbErr != nil { -			if !errors.Is(dbErr, db.ErrNoEntries) { -				// real error -				return nil, nil, newErrDB(fmt.Errorf("GetRemoteStatus: error during GetStatusByURI for %s: %w", uriString, dbErr)) -			} -			// no problem, just press on -		} else if !refetch { -			// we already had the status and we aren't being asked to refetch the AP representation -			return status, nil, nil +		// Else, search the database for existing by ID URL. +		status, err = d.state.DB.GetStatusByURL(ctx, uriStr) +		if err != nil && !errors.Is(err, db.ErrNoEntries) { +			return nil, nil, fmt.Errorf("GetStatusByURI: error checking database for status %s by url: %w", uriStr, err)  		}  	} -	// guard against having our own statuses passed in -	if host := statusURI.Host; host == config.GetHost() || host == config.GetAccountDomain() { -		// this is our status, definitely don't search for it -		if status != nil { -			return status, nil, nil +	if status == nil { +		// Ensure that this is isn't a search for a local status. +		if uri.Host == config.GetHost() || uri.Host == config.GetAccountDomain() { +			return nil, nil, NewErrNotRetrievable(err) // this will be db.ErrNoEntries  		} -		return nil, nil, NewErrNotRetrievable(fmt.Errorf("GetRemoteStatus: uri %s is apparently ours, but we have nothing in the db for it, will not proceed to dereference our own status", uriString)) + +		// Create and pass-through a new bare-bones model for deref. +		return d.enrichStatus(ctx, requestUser, uri, >smodel.Status{ +			Local: func() *bool { var false bool; return &false }(), +			URI:   uriStr, +		}, nil)  	} -	// if we got here, either we didn't have the status -	// in the db, or we had it but need to refetch it -	tsport, err := d.transportController.NewTransportForUsername(ctx, username) +	// Try to update + deref existing status model. +	latest, apubStatus, err := d.enrichStatus(ctx, +		requestUser, +		uri, +		status, +		nil, +	)  	if err != nil { -		return nil, nil, newErrTransportError(fmt.Errorf("GetRemoteStatus: error creating transport for %s: %w", username, err)) -	} +		log.Errorf(ctx, "error enriching remote status: %v", err) -	statusable, derefErr := d.dereferenceStatusable(ctx, tsport, statusURI) -	if derefErr != nil { -		return nil, nil, wrapDerefError(derefErr, "GetRemoteStatus: error dereferencing statusable") -	} +		// Update fetch-at to slow re-attempts. +		status.FetchedAt = time.Now() +		_ = d.state.DB.UpdateStatus(ctx, status, "fetched_at") -	if status != nil && refetch { -		// we already had the status in the db, and we've also -		// now fetched the AP representation as requested -		return status, statusable, nil +		// Fallback to existing. +		return status, nil, nil  	} -	// from here on out we can consider this to be a 'new' status because we didn't have the status in the db already -	accountURI, err := ap.ExtractAttributedTo(statusable) -	if err != nil { -		return nil, nil, newErrOther(fmt.Errorf("GetRemoteStatus: error extracting attributedTo: %w", err)) -	} +	return latest, apubStatus, nil +} -	// we need to get the author of the status else we can't serialize it properly -	if _, err = d.GetAccountByURI(ctx, username, accountURI); err != nil { -		return nil, nil, newErrOther(fmt.Errorf("GetRemoteStatus: couldn't get status author: %s", err)) +// RefreshStatus: implements Dereferencer{}.RefreshStatus(). +func (d *deref) RefreshStatus(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) (*gtsmodel.Status, ap.Statusable, error) { +	// Check whether needs update. +	if statusUpToDate(status) { +		return status, nil, nil  	} -	status, err = d.typeConverter.ASStatusToStatus(ctx, statusable) +	// Parse the URI from status. +	uri, err := url.Parse(status.URI)  	if err != nil { -		return nil, nil, newErrOther(fmt.Errorf("GetRemoteStatus: error converting statusable to status: %s", err)) +		return nil, nil, fmt.Errorf("RefreshStatus: invalid status uri %q: %w", status.URI, err)  	} -	ulid, err := id.NewULIDFromTime(status.CreatedAt) +	// Try to update + deref existing status model. +	latest, apubStatus, err := d.enrichStatus(ctx, +		requestUser, +		uri, +		status, +		apubStatus, +	)  	if err != nil { -		return nil, nil, newErrOther(fmt.Errorf("GetRemoteStatus: error generating new id for status: %s", err)) -	} -	status.ID = ulid - -	if err := d.populateStatusFields(ctx, status, username, includeParent); err != nil { -		return nil, nil, newErrOther(fmt.Errorf("GetRemoteStatus: error populating status fields: %s", err)) +		return nil, nil, err  	} -	if err := d.db.PutStatus(ctx, status); err != nil && !errors.Is(err, db.ErrAlreadyExists) { -		return nil, nil, newErrDB(fmt.Errorf("GetRemoteStatus: error putting new status: %s", err)) -	} +	// This status was updated, enqueue re-dereferencing the whole thread. +	d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { +		d.dereferenceThread(ctx, requestUser, uri, latest, apubStatus) +	}) -	return status, statusable, nil +	return latest, apubStatus, nil  } -func (d *deref) dereferenceStatusable(ctx context.Context, tsport transport.Transport, remoteStatusID *url.URL) (ap.Statusable, error) { -	if blocked, err := d.db.IsDomainBlocked(ctx, remoteStatusID.Host); blocked || err != nil { -		return nil, fmt.Errorf("DereferenceStatusable: domain %s is blocked", remoteStatusID.Host) +// RefreshStatusAsync: implements Dereferencer{}.RefreshStatusAsync(). +func (d *deref) RefreshStatusAsync(ctx context.Context, requestUser string, status *gtsmodel.Status, apubStatus ap.Statusable, force bool) { +	// Check whether needs update. +	if statusUpToDate(status) { +		return  	} -	b, err := tsport.Dereference(ctx, remoteStatusID) +	// Parse the URI from status. +	uri, err := url.Parse(status.URI)  	if err != nil { -		return nil, fmt.Errorf("dereferenceStatusable: error deferencing %s: %w", remoteStatusID.String(), err) +		log.Errorf(ctx, "RefreshStatusAsync: invalid status uri %q: %v", status.URI, err) +		return  	} -	return ap.ResolveStatusable(ctx, b) +	// Enqueue a worker function to re-fetch this status async. +	d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { +		latest, apubStatus, err := d.enrichStatus(ctx, requestUser, uri, status, apubStatus) +		if err != nil { +			log.Errorf(ctx, "error enriching remote status: %v", err) +			return +		} + +		// This status was updated, re-dereference the whole thread. +		d.dereferenceThread(ctx, requestUser, uri, latest, apubStatus) +	})  } -// populateStatusFields fetches all the information we temporarily pinned to an incoming -// federated status, back in the federating db's Create function. -// -// When a status comes in from the federation API, there are certain fields that -// haven't been dereferenced yet, because we needed to provide a snappy synchronous -// response to the caller. By the time it reaches this function though, it's being -// processed asynchronously, so we have all the time in the world to fetch the various -// bits and bobs that are attached to the status, and properly flesh it out, before we -// send the status to any timelines and notify people. -// -// Things to dereference and fetch here: -// -// 1. Media attachments. -// 2. Hashtags. -// 3. Emojis. -// 4. Mentions. -// 5. Replied-to-status. -// -// SIDE EFFECTS: -// This function will deference all of the above, insert them in the database as necessary, -// and attach them to the status. The status itself will not be added to the database yet, -// that's up the caller to do. -func (d *deref) populateStatusFields(ctx context.Context, status *gtsmodel.Status, requestingUsername string, includeParent bool) error { -	statusIRI, err := url.Parse(status.URI) +// enrichStatus will enrich the given status, whether a new barebones model, or existing model from the database. It handles necessary dereferencing etc. +func (d *deref) enrichStatus(ctx context.Context, requestUser string, uri *url.URL, status *gtsmodel.Status, apubStatus ap.Statusable) (*gtsmodel.Status, ap.Statusable, error) { +	// Pre-fetch a transport for requesting username, used by later dereferencing. +	tsport, err := d.transportController.NewTransportForUsername(ctx, requestUser)  	if err != nil { -		return fmt.Errorf("populateStatusFields: couldn't parse status URI %s: %s", status.URI, err) +		return nil, nil, fmt.Errorf("enrichStatus: couldn't create transport: %w", err)  	} -	blocked, err := d.db.IsURIBlocked(ctx, statusIRI) -	if err != nil { -		return fmt.Errorf("populateStatusFields: error checking blocked status of %s: %s", statusIRI, err) -	} -	if blocked { -		return fmt.Errorf("populateStatusFields: domain %s is blocked", statusIRI) +	// Check whether this account URI is a blocked domain / subdomain. +	if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil { +		return nil, nil, fmt.Errorf("enrichStatus: error checking blocked domain: %w", err) +	} else if blocked { +		return nil, nil, fmt.Errorf("enrichStatus: %s is blocked", uri.Host)  	} -	// in case the status doesn't have an id yet (ie., it hasn't entered the database yet), then create one -	if status.ID == "" { -		newID, err := id.NewULIDFromTime(status.CreatedAt) +	var derefd bool + +	if apubStatus == nil { +		// Dereference latest version of the status. +		b, err := tsport.Dereference(ctx, uri)  		if err != nil { -			return fmt.Errorf("populateStatusFields: error creating ulid for status: %s", err) +			return nil, nil, &ErrNotRetrievable{fmt.Errorf("enrichStatus: error deferencing %s: %w", uri, err)}  		} -		status.ID = newID -	} -	// 1. Media attachments. -	if err := d.populateStatusAttachments(ctx, status, requestingUsername); err != nil { -		return fmt.Errorf("populateStatusFields: error populating status attachments: %s", err) -	} +		// Attempt to resolve ActivityPub status from data. +		apubStatus, err = ap.ResolveStatusable(ctx, b) +		if err != nil { +			return nil, nil, fmt.Errorf("enrichStatus: error resolving statusable from data for account %s: %w", uri, err) +		} -	// 2. Hashtags -	// TODO +		// Mark as deref'd. +		derefd = true +	} -	// 3. Emojis -	if err := d.populateStatusEmojis(ctx, status, requestingUsername); err != nil { -		return fmt.Errorf("populateStatusFields: error populating status emojis: %s", err) +	// Get the attributed-to status in order to fetch profile. +	attributedTo, err := ap.ExtractAttributedTo(apubStatus) +	if err != nil { +		return nil, nil, errors.New("enrichStatus: attributedTo was empty")  	} -	// 4. Mentions -	// TODO: do we need to handle removing empty mention objects and just using mention IDs slice? -	if err := d.populateStatusMentions(ctx, status, requestingUsername); err != nil { -		return fmt.Errorf("populateStatusFields: error populating status mentions: %s", err) +	// Ensure we have the author account of the status dereferenced (+ up-to-date). +	if author, _, err := d.getAccountByURI(ctx, requestUser, attributedTo); err != nil { +		if status.AccountID == "" { +			// Provided status account is nil, i.e. this is a new status / author, so a deref fail is unrecoverable. +			return nil, nil, fmt.Errorf("enrichStatus: failed to dereference status author %s: %w", uri, err) +		} +	} else if status.AccountID != "" && status.AccountID != author.ID { +		// There already existed an account for this status author, but account ID changed. This shouldn't happen! +		log.Warnf(ctx, "status author account ID changed: old=%s new=%s", status.AccountID, author.ID)  	} -	// 5. Replied-to-status (only if requested) -	if includeParent { -		if err := d.populateStatusRepliedTo(ctx, status, requestingUsername); err != nil { -			return fmt.Errorf("populateStatusFields: error populating status repliedTo: %s", err) +	// By default we assume that apubStatus has been passed, +	// indicating that the given status is already latest. +	latestStatus := status + +	if derefd { +		// ActivityPub model was recently dereferenced, so assume that passed status +		// may contain out-of-date information, convert AP model to our GTS model. +		latestStatus, err = d.typeConverter.ASStatusToStatus(ctx, apubStatus) +		if err != nil { +			return nil, nil, fmt.Errorf("enrichStatus: error converting statusable to gts model for status %s: %w", uri, err)  		}  	} -	return nil -} +	// Use existing status ID. +	latestStatus.ID = status.ID -func (d *deref) populateStatusMentions(ctx context.Context, status *gtsmodel.Status, requestingUsername string) error { -	// At this point, mentions should have the namestring and mentionedAccountURI set on them. -	// We can use these to find the accounts. - -	mentionIDs := []string{} -	newMentions := []*gtsmodel.Mention{} -	for _, m := range status.Mentions { -		if m.ID != "" { -			// we've already populated this mention, since it has an ID -			log.Debug(ctx, "mention already populated") -			mentionIDs = append(mentionIDs, m.ID) -			newMentions = append(newMentions, m) -			continue +	if latestStatus.ID == "" { +		// Generate new status ID from the provided creation date. +		latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt) +		if err != nil { +			return nil, nil, fmt.Errorf("enrichStatus: invalid created at date: %w", err)  		} +	} -		if m.TargetAccountURI == "" { -			log.Debug(ctx, "target URI not set on mention") -			continue +	// Carry-over values and set fetch time. +	latestStatus.FetchedAt = time.Now() +	latestStatus.Local = status.Local + +	// Ensure the status' mentions are populated, and pass in existing to check for changes. +	if err := d.fetchStatusMentions(ctx, requestUser, status, latestStatus); err != nil { +		return nil, nil, fmt.Errorf("enrichStatus: error populating mentions for status %s: %w", uri, err) +	} + +	// TODO: populateStatusTags() + +	// Ensure the status' media attachments are populated, passing in existing to check for changes. +	if err := d.fetchStatusAttachments(ctx, tsport, status, latestStatus); err != nil { +		return nil, nil, fmt.Errorf("enrichStatus: error populating attachments for status %s: %w", uri, err) +	} + +	// Ensure the status' emoji attachments are populated, passing in existing to check for changes. +	if err := d.fetchStatusEmojis(ctx, requestUser, status, latestStatus); err != nil { +		return nil, nil, fmt.Errorf("enrichStatus: error populating emojis for status %s: %w", uri, err) +	} + +	if status.CreatedAt.IsZero() { +		// CreatedAt will be zero if no local copy was +		// found in one of the GetStatusBy___() functions. +		// +		// This is new, put the status in the database. +		err := d.state.DB.PutStatus(ctx, latestStatus) + +		if errors.Is(err, db.ErrAlreadyExists) { +			// TODO: replace this quick fix with per-URI deref locks. +			latestStatus, err = d.state.DB.GetStatusByURI(ctx, latestStatus.URI) +			return latestStatus, nil, err  		} -		targetAccountURI, err := url.Parse(m.TargetAccountURI)  		if err != nil { -			log.Debugf(ctx, "error parsing mentioned account uri %s: %s", m.TargetAccountURI, err) -			continue +			return nil, nil, fmt.Errorf("enrichStatus: error putting in database: %w", err)  		} +	} else { +		// This is an existing status, update the model in the database. +		if err := d.state.DB.UpdateStatus(ctx, latestStatus); err != nil { +			return nil, nil, fmt.Errorf("enrichStatus: error updating database: %w", err) +		} +	} + +	return latestStatus, apubStatus, nil +} -		var targetAccount *gtsmodel.Account -		errs := []string{} +func (d *deref) fetchStatusMentions(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error { +	// Allocate new slice to take the yet-to-be created mention IDs. +	status.MentionIDs = make([]string, len(status.Mentions)) -		// check if account is in the db already -		if a, err := d.db.GetAccountByURI(ctx, targetAccountURI.String()); err != nil { -			errs = append(errs, err.Error()) -		} else { -			log.Debugf(ctx, "got target account %s with id %s through GetAccountByURI", targetAccountURI, a.ID) -			targetAccount = a -		} +	for i := range status.Mentions { +		mention := status.Mentions[i] -		if targetAccount == nil { -			// we didn't find the account in our database already -			// check if we can get the account remotely (dereference it) -			if a, err := d.GetAccountByURI(ctx, requestingUsername, targetAccountURI); err != nil { -				errs = append(errs, err.Error()) -			} else { -				log.Debugf(ctx, "got target account %s with id %s through GetRemoteAccount", targetAccountURI, a.ID) -				targetAccount = a -			} +		// Look for existing mention with target account URI first. +		existing, ok := existing.GetMentionByTargetURI(mention.TargetAccountURI) +		if ok && existing.ID != "" { +			status.Mentions[i] = existing +			status.MentionIDs[i] = existing.ID +			continue  		} -		if targetAccount == nil { -			log.Debugf(ctx, "couldn't get target account %s: %s", m.TargetAccountURI, strings.Join(errs, " : ")) +		// Ensure that mention account URI is parseable. +		accountURI, err := url.Parse(mention.TargetAccountURI) +		if err != nil { +			log.Errorf(ctx, "invalid account uri %q: %v", mention.TargetAccountURI, err)  			continue  		} -		mID, err := id.NewRandomULID() +		// Ensure we have the account of the mention target dereferenced. +		mention.TargetAccount, _, err = d.getAccountByURI(ctx, requestUser, accountURI)  		if err != nil { -			return fmt.Errorf("populateStatusMentions: error generating ulid: %s", err) +			log.Errorf(ctx, "failed to dereference account %s: %v", accountURI, err) +			continue  		} -		newMention := >smodel.Mention{ -			ID:               mID, -			StatusID:         status.ID, -			Status:           m.Status, -			CreatedAt:        status.CreatedAt, -			UpdatedAt:        status.UpdatedAt, -			OriginAccountID:  status.AccountID, -			OriginAccountURI: status.AccountURI, -			OriginAccount:    status.Account, -			TargetAccountID:  targetAccount.ID, -			TargetAccount:    targetAccount, -			NameString:       m.NameString, -			TargetAccountURI: targetAccount.URI, -			TargetAccountURL: targetAccount.URL, +		// Generate new ID according to status creation. +		mention.ID, err = id.NewULIDFromTime(status.CreatedAt) +		if err != nil { +			log.Errorf(ctx, "invalid created at date: %v", err) +			mention.ID = id.NewULID() // just use "now"  		} -		if err := d.db.PutMention(ctx, newMention); err != nil { -			return fmt.Errorf("populateStatusMentions: error creating mention: %s", err) +		// Set known further mention details. +		mention.CreatedAt = status.CreatedAt +		mention.UpdatedAt = status.UpdatedAt +		mention.OriginAccount = status.Account +		mention.OriginAccountID = status.AccountID +		mention.OriginAccountURI = status.AccountURI +		mention.TargetAccountID = mention.TargetAccount.ID +		mention.TargetAccountURI = mention.TargetAccount.URI +		mention.TargetAccountURL = mention.TargetAccount.URL +		mention.StatusID = status.ID +		mention.Status = status + +		// Place the new mention into the database. +		if err := d.state.DB.PutMention(ctx, mention); err != nil { +			return fmt.Errorf("error putting mention in database: %w", err)  		} -		mentionIDs = append(mentionIDs, newMention.ID) -		newMentions = append(newMentions, newMention) +		// Set the *new* mention and ID. +		status.Mentions[i] = mention +		status.MentionIDs[i] = mention.ID  	} -	status.MentionIDs = mentionIDs -	status.Mentions = newMentions +	for i := 0; i < len(status.MentionIDs); i++ { +		if status.MentionIDs[i] == "" { +			// This is a failed mention population, likely due +			// to invalid incoming data / now-deleted accounts. +			copy(status.Mentions[i:], status.Mentions[i+1:]) +			copy(status.MentionIDs[i:], status.MentionIDs[i+1:]) +			status.Mentions = status.Mentions[:len(status.Mentions)-1] +			status.MentionIDs = status.MentionIDs[:len(status.MentionIDs)-1] +		} +	}  	return nil  } -func (d *deref) populateStatusAttachments(ctx context.Context, status *gtsmodel.Status, requestingUsername string) error { -	// At this point we should know: -	// * the media type of the file we're looking for (a.File.ContentType) -	// * the file type (a.Type) -	// * the remote URL (a.RemoteURL) -	// This should be enough to dereference the piece of media. - -	attachmentIDs := []string{} -	attachments := []*gtsmodel.MediaAttachment{} - -	for _, a := range status.Attachments { -		a.AccountID = status.AccountID -		a.StatusID = status.ID - -		processingMedia, err := d.GetRemoteMedia(ctx, requestingUsername, a.AccountID, a.RemoteURL, &media.AdditionalMediaInfo{ -			CreatedAt:   &a.CreatedAt, -			StatusID:    &a.StatusID, -			RemoteURL:   &a.RemoteURL, -			Description: &a.Description, -			Blurhash:    &a.Blurhash, +func (d *deref) fetchStatusAttachments(ctx context.Context, tsport transport.Transport, existing *gtsmodel.Status, status *gtsmodel.Status) error { +	// Allocate new slice to take the yet-to-be fetched attachment IDs. +	status.AttachmentIDs = make([]string, len(status.Attachments)) + +	for i := range status.Attachments { +		placeholder := status.Attachments[i] + +		// Look for existing media attachment with remoet URL first. +		existing, ok := existing.GetAttachmentByRemoteURL(placeholder.RemoteURL) +		if ok && existing.ID != "" { +			status.Attachments[i] = existing +			status.AttachmentIDs[i] = existing.ID +			continue +		} + +		// Ensure a valid media attachment remote URL. +		remoteURL, err := url.Parse(placeholder.RemoteURL) +		if err != nil { +			log.Errorf(ctx, "invalid remote media url %q: %v", placeholder.RemoteURL, err) +			continue +		} + +		// Start pre-processing remote media at remote URL. +		processing, err := d.mediaManager.PreProcessMedia(ctx, func(ctx context.Context) (io.ReadCloser, int64, error) { +			return tsport.DereferenceMedia(ctx, remoteURL) +		}, nil, status.AccountID, &media.AdditionalMediaInfo{ +			StatusID:    &status.ID, +			RemoteURL:   &placeholder.RemoteURL, +			Description: &placeholder.Description, +			Blurhash:    &placeholder.Blurhash,  		})  		if err != nil { -			log.Errorf(ctx, "couldn't get remote media %s: %s", a.RemoteURL, err) +			log.Errorf(ctx, "error processing attachment: %v", err)  			continue  		} -		attachment, err := processingMedia.LoadAttachment(ctx) +		// Force attachment loading *right now*. +		media, err := processing.LoadAttachment(ctx)  		if err != nil { -			log.Errorf(ctx, "couldn't load remote attachment %s: %s", a.RemoteURL, err) +			log.Errorf(ctx, "error loading attachment: %v", err)  			continue  		} -		attachmentIDs = append(attachmentIDs, attachment.ID) -		attachments = append(attachments, attachment) +		// Set the *new* attachment and ID. +		status.Attachments[i] = media +		status.AttachmentIDs[i] = media.ID  	} -	status.AttachmentIDs = attachmentIDs -	status.Attachments = attachments +	for i := 0; i < len(status.AttachmentIDs); i++ { +		if status.AttachmentIDs[i] == "" { +			// This is a failed attachment population, this may +			// be due to us not currently supporting a media type. +			copy(status.Attachments[i:], status.Attachments[i+1:]) +			copy(status.AttachmentIDs[i:], status.AttachmentIDs[i+1:]) +			status.Attachments = status.Attachments[:len(status.Attachments)-1] +			status.AttachmentIDs = status.AttachmentIDs[:len(status.AttachmentIDs)-1] +		} +	}  	return nil  } -func (d *deref) populateStatusEmojis(ctx context.Context, status *gtsmodel.Status, requestingUsername string) error { -	emojis, err := d.populateEmojis(ctx, status.Emojis, requestingUsername) +func (d *deref) fetchStatusEmojis(ctx context.Context, requestUser string, existing *gtsmodel.Status, status *gtsmodel.Status) error { +	// Fetch the full-fleshed-out emoji objects for our status. +	emojis, err := d.populateEmojis(ctx, status.Emojis, requestUser)  	if err != nil { -		return err +		return fmt.Errorf("failed to populate emojis: %w", err)  	} +	// Iterate over and get their IDs.  	emojiIDs := make([]string, 0, len(emojis))  	for _, e := range emojis {  		emojiIDs = append(emojiIDs, e.ID)  	} +	// Set known emoji details.  	status.Emojis = emojis  	status.EmojiIDs = emojiIDs -	return nil -} - -func (d *deref) populateStatusRepliedTo(ctx context.Context, status *gtsmodel.Status, requestingUsername string) error { -	if status.InReplyToURI != "" && status.InReplyToID == "" { -		statusURI, err := url.Parse(status.InReplyToURI) -		if err != nil { -			return err -		} - -		replyToStatus, _, err := d.GetStatus(ctx, requestingUsername, statusURI, false, false) -		if err != nil { -			return fmt.Errorf("populateStatusRepliedTo: couldn't get reply to status with uri %s: %s", status.InReplyToURI, err) -		} - -		// we have the status -		status.InReplyToID = replyToStatus.ID -		status.InReplyTo = replyToStatus -		status.InReplyToAccountID = replyToStatus.AccountID -		status.InReplyToAccount = replyToStatus.Account -	}  	return nil  } | 
