diff options
author | 2024-06-26 15:01:16 +0000 | |
---|---|---|
committer | 2024-06-26 16:01:16 +0100 | |
commit | 21bb324156f582e918a097ea744e52fc21b2ddf4 (patch) | |
tree | 50db5cfd42e26224591f59ff62de14a3715677b5 /internal/processing/media/getfile.go | |
parent | [docs] restructure federation section (#3038) (diff) | |
download | gotosocial-21bb324156f582e918a097ea744e52fc21b2ddf4.tar.xz |
[chore] media and emoji refactoring (#3000)
* start updating media manager interface ready for storing attachments / emoji right away
* store emoji and media as uncached immediately, then (re-)cache on Processing{}.Load()
* remove now unused media workers
* fix tests and issues
* fix another test!
* fix emoji activitypub uri setting behaviour, fix remainder of test compilation issues
* fix more tests
* fix (most of) remaining tests, add debouncing to repeatedly failing media / emojis
* whoops, rebase issue
* remove kim's whacky experiments
* do some reshuffling, ensure emoji uri gets set
* ensure marked as not cached on cleanup
* tweaks to media / emoji processing to handle context canceled better
* ensure newly fetched emojis actually get set in returned slice
* use different varnames to be a bit more obvious
* move emoji refresh rate limiting to dereferencer
* add exported dereferencer functions for remote media, use these for recaching in processor
* add check for nil attachment in updateAttachment()
* remove unused emoji and media fields + columns
* see previous commit
* fix old migrations expecting image_updated_at to exists (from copies of old models)
* remove freshness checking code (seems to be broken...)
* fix error arg causing nil ptr exception
* finish documentating functions with comments, slight tweaks to media / emoji deref error logic
* remove some extra unneeded boolean checking
* finish writing documentation (code comments) for exported media manager methods
* undo changes to migration snapshot gtsmodels, updated failing migration to have its own snapshot
* move doesColumnExist() to util.go in migrations package
Diffstat (limited to 'internal/processing/media/getfile.go')
-rw-r--r-- | internal/processing/media/getfile.go | 384 |
1 files changed, 213 insertions, 171 deletions
diff --git a/internal/processing/media/getfile.go b/internal/processing/media/getfile.go index 28f5e6464..7ba549029 100644 --- a/internal/processing/media/getfile.go +++ b/internal/processing/media/getfile.go @@ -19,14 +19,14 @@ package media import ( "context" + "errors" "fmt" - "io" "net/url" "strings" "time" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" - "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/media" @@ -38,7 +38,7 @@ import ( // to the caller via an io.reader embedded in *apimodel.Content. func (p *Processor) GetFile( ctx context.Context, - requestingAccount *gtsmodel.Account, + requester *gtsmodel.Account, form *apimodel.GetContentRequestForm, ) (*apimodel.Content, gtserror.WithCode) { // parse the form fields @@ -69,13 +69,13 @@ func (p *Processor) GetFile( } // make sure the requesting account and the media account don't block each other - if requestingAccount != nil { - blocked, err := p.state.DB.IsEitherBlocked(ctx, requestingAccount.ID, owningAccountID) + if requester != nil { + blocked, err := p.state.DB.IsEitherBlocked(ctx, requester.ID, owningAccountID) if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("block status could not be established between accounts %s and %s: %s", owningAccountID, requestingAccount.ID, err)) + return nil, gtserror.NewErrorNotFound(fmt.Errorf("block status could not be established between accounts %s and %s: %s", owningAccountID, requester.ID, err)) } if blocked { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("block exists between accounts %s and %s", owningAccountID, requestingAccount.ID)) + return nil, gtserror.NewErrorNotFound(fmt.Errorf("block exists between accounts %s and %s", owningAccountID, requester.ID)) } } @@ -83,71 +83,78 @@ func (p *Processor) GetFile( // so we need to take different steps depending on the media type being requested switch mediaType { case media.TypeEmoji: - return p.getEmojiContent(ctx, wantedMediaID, owningAccountID, mediaSize) + return p.getEmojiContent(ctx, + owningAccountID, + wantedMediaID, + mediaSize, + ) case media.TypeAttachment, media.TypeHeader, media.TypeAvatar: - return p.getAttachmentContent(ctx, requestingAccount, wantedMediaID, owningAccountID, mediaSize) + return p.getAttachmentContent(ctx, + requester, + owningAccountID, + wantedMediaID, + mediaSize, + ) default: return nil, gtserror.NewErrorNotFound(fmt.Errorf("media type %s not recognized", mediaType)) } } -/* - UTIL FUNCTIONS -*/ - -func parseType(s string) (media.Type, error) { - switch s { - case string(media.TypeAttachment): - return media.TypeAttachment, nil - case string(media.TypeHeader): - return media.TypeHeader, nil - case string(media.TypeAvatar): - return media.TypeAvatar, nil - case string(media.TypeEmoji): - return media.TypeEmoji, nil +func (p *Processor) getAttachmentContent( + ctx context.Context, + requester *gtsmodel.Account, + ownerID string, + mediaID string, + sizeStr media.Size, +) ( + *apimodel.Content, + gtserror.WithCode, +) { + // Search for media with given ID in the database. + attach, err := p.state.DB.GetAttachmentByID(ctx, mediaID) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err := gtserror.Newf("error fetching media from database: %w", err) + return nil, gtserror.NewErrorInternalError(err) } - return "", fmt.Errorf("%s not a recognized media.Type", s) -} -func parseSize(s string) (media.Size, error) { - switch s { - case string(media.SizeSmall): - return media.SizeSmall, nil - case string(media.SizeOriginal): - return media.SizeOriginal, nil - case string(media.SizeStatic): - return media.SizeStatic, nil + if attach == nil { + const text = "media not found" + return nil, gtserror.NewErrorNotFound(errors.New(text), text) } - return "", fmt.Errorf("%s not a recognized media.Size", s) -} -func (p *Processor) getAttachmentContent(ctx context.Context, requestingAccount *gtsmodel.Account, wantedMediaID string, owningAccountID string, mediaSize media.Size) (*apimodel.Content, gtserror.WithCode) { - // retrieve attachment from the database and do basic checks on it - a, err := p.state.DB.GetAttachmentByID(ctx, wantedMediaID) - if err != nil { - err = gtserror.Newf("attachment %s could not be taken from the db: %w", wantedMediaID, err) - return nil, gtserror.NewErrorNotFound(err) + // Ensure the 'owner' owns media. + if attach.AccountID != ownerID { + const text = "media was not owned by passed account id" + return nil, gtserror.NewErrorNotFound(errors.New(text) /* no help text! */) } - if a.AccountID != owningAccountID { - err = gtserror.Newf("attachment %s is not owned by %s", wantedMediaID, owningAccountID) - return nil, gtserror.NewErrorNotFound(err) - } + var remoteURL *url.URL + if attach.RemoteURL != "" { - // If this is an "Unknown" file type, ie., one we - // tried to process and couldn't, or one we refused - // to process because it wasn't supported, then we - // can skip a lot of steps here by simply forwarding - // the request to the remote URL. - if a.Type == gtsmodel.FileTypeUnknown { - remoteURL, err := url.Parse(a.RemoteURL) + // Parse media remote URL to valid URL object. + remoteURL, err = url.Parse(attach.RemoteURL) if err != nil { - err = gtserror.Newf("error parsing remote URL of 'Unknown'-type attachment for redirection: %w", err) + err := gtserror.Newf("invalid media remote url %s: %w", attach.RemoteURL, err) + return nil, gtserror.NewErrorInternalError(err) + } + } + + // Uknown file types indicate no *locally* + // stored data we can serve. Handle separately. + if attach.Type == gtsmodel.FileTypeUnknown { + if remoteURL == nil { + err := gtserror.Newf("missing remote url for unknown type media %s: %w", attach.ID, err) return nil, gtserror.NewErrorInternalError(err) } + // If this is an "Unknown" file type, ie., one we + // tried to process and couldn't, or one we refused + // to process because it wasn't supported, then we + // can skip a lot of steps here by simply forwarding + // the request to the remote URL. url := &storage.PresignedURL{ URL: remoteURL, + // We might manage to cache the media // at some point, so set a low-ish expiry. Expiry: time.Now().Add(2 * time.Hour), @@ -156,162 +163,197 @@ func (p *Processor) getAttachmentContent(ctx context.Context, requestingAccount return &apimodel.Content{URL: url}, nil } - if !*a.Cached { - // if we don't have it cached, then we can assume two things: - // 1. this is remote media, since local media should never be uncached - // 2. we need to fetch it again using a transport and the media manager - remoteMediaIRI, err := url.Parse(a.RemoteURL) - if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote media iri %s: %w", a.RemoteURL, err)) - } - - // use an empty string as requestingUsername to use the instance account, unless the request for this - // media has been http signed, then use the requesting account to make the request to remote server - var requestingUsername string - if requestingAccount != nil { - requestingUsername = requestingAccount.Username - } + var requestUser string - // Pour one out for tobi's original streamed recache - // (streaming data both to the client and storage). - // Gone and forever missed <3 - // - // [ - // the reason it was removed was because a slow - // client connection could hold open a storage - // recache operation -> holding open a media worker. - // ] - - dataFn := func(ctx context.Context) (io.ReadCloser, int64, error) { - t, err := p.transportController.NewTransportForUsername(ctx, requestingUsername) - if err != nil { - return nil, 0, err - } - return t.DereferenceMedia(gtscontext.SetFastFail(ctx), remoteMediaIRI) - } + if requester != nil { + // Set requesting acc username. + requestUser = requester.Username + } - // Start recaching this media with the prepared data function. - processingMedia, err := p.mediaManager.PreProcessMediaRecache(ctx, dataFn, wantedMediaID) - if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching media: %w", err)) - } + // Ensure that stored media is cached. + // (this handles local media / recaches). + attach, err = p.federator.RefreshMedia( + ctx, + requestUser, + attach, + media.AdditionalMediaInfo{}, + false, + ) + if err != nil { + err := gtserror.Newf("error recaching media: %w", err) + return nil, gtserror.NewErrorNotFound(err) + } - // Load attachment and block until complete - a, err = processingMedia.LoadAttachment(ctx) - if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached attachment: %w", err)) - } + // Start preparing API content model. + apiContent := &apimodel.Content{ + ContentUpdated: attach.UpdatedAt, } - var ( - storagePath string - attachmentContent = &apimodel.Content{ - ContentUpdated: a.UpdatedAt, - } - ) + // Retrieve appropriate + // size file from storage. + switch sizeStr { - // get file information from the attachment depending on the requested media size - switch mediaSize { case media.SizeOriginal: - attachmentContent.ContentType = a.File.ContentType - attachmentContent.ContentLength = int64(a.File.FileSize) - storagePath = a.File.Path + apiContent.ContentType = attach.File.ContentType + apiContent.ContentLength = int64(attach.File.FileSize) + return p.getContent(ctx, + attach.File.Path, + apiContent, + ) + case media.SizeSmall: - attachmentContent.ContentType = a.Thumbnail.ContentType - attachmentContent.ContentLength = int64(a.Thumbnail.FileSize) - storagePath = a.Thumbnail.Path + apiContent.ContentType = attach.Thumbnail.ContentType + apiContent.ContentLength = int64(attach.Thumbnail.FileSize) + return p.getContent(ctx, + attach.Thumbnail.Path, + apiContent, + ) + default: - return nil, gtserror.NewErrorNotFound(fmt.Errorf("media size %s not recognized for attachment", mediaSize)) + const text = "invalid media attachment size" + return nil, gtserror.NewErrorBadRequest(errors.New(text), text) } - - // ... so now we can safely return it - return p.retrieveFromStorage(ctx, storagePath, attachmentContent) } -func (p *Processor) getEmojiContent(ctx context.Context, fileName string, owningAccountID string, emojiSize media.Size) (*apimodel.Content, gtserror.WithCode) { - emojiContent := &apimodel.Content{} - var storagePath string +func (p *Processor) getEmojiContent( + ctx context.Context, - // reconstruct the static emoji image url -- reason - // for using the static URL rather than full size url - // is that static emojis are always encoded as png, - // so this is more reliable than using full size url - imageStaticURL := uris.URIForAttachment( - owningAccountID, + ownerID string, + emojiID string, + sizeStr media.Size, +) ( + *apimodel.Content, + gtserror.WithCode, +) { + // Reconstruct static emoji image URL to search for it. + // As refreshed emojis use a newly generated path ID to + // differentiate them (cache-wise) from the original. + staticURL := uris.URIForAttachment( + ownerID, string(media.TypeEmoji), string(media.SizeStatic), - fileName, + emojiID, "png", ) - e, err := p.state.DB.GetEmojiByStaticURL(ctx, imageStaticURL) - if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s could not be taken from the db: %w", fileName, err)) + // Search for emoji with given static URL in the database. + emoji, err := p.state.DB.GetEmojiByStaticURL(ctx, staticURL) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err := gtserror.Newf("error fetching emoji from database: %w", err) + return nil, gtserror.NewErrorInternalError(err) } - if *e.Disabled { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s has been disabled", fileName)) + if emoji == nil { + const text = "emoji not found" + return nil, gtserror.NewErrorNotFound(errors.New(text), text) } - if !*e.Cached { - // if we don't have it cached, then we can assume two things: - // 1. this is remote emoji, since local emoji should never be uncached - // 2. we need to fetch it again using a transport and the media manager - remoteURL, err := url.Parse(e.ImageRemoteURL) - if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote emoji iri %s: %w", e.ImageRemoteURL, err)) - } + if *emoji.Disabled { + const text = "emoji has been disabled" + return nil, gtserror.NewErrorNotFound(errors.New(text), text) + } - dataFn := func(ctx context.Context) (io.ReadCloser, int64, error) { - t, err := p.transportController.NewTransportForUsername(ctx, "") - if err != nil { - return nil, 0, err - } - return t.DereferenceMedia(gtscontext.SetFastFail(ctx), remoteURL) - } + // Ensure that stored emoji is cached. + // (this handles local emoji / recaches). + emoji, err = p.federator.RefreshEmoji( + ctx, + emoji, + media.AdditionalEmojiInfo{}, + false, + ) + if err != nil { + err := gtserror.Newf("error recaching emoji: %w", err) + return nil, gtserror.NewErrorNotFound(err) + } - // Start recaching this emoji with the prepared data function. - processingEmoji, err := p.mediaManager.PreProcessEmojiRecache(ctx, dataFn, e.ID) - if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching emoji: %w", err)) - } + // Start preparing API content model. + apiContent := &apimodel.Content{} - // Load attachment and block until complete - e, err = processingEmoji.LoadEmoji(ctx) - if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached emoji: %w", err)) - } - } + // Retrieve appropriate + // size file from storage. + switch sizeStr { - switch emojiSize { case media.SizeOriginal: - emojiContent.ContentType = e.ImageContentType - emojiContent.ContentLength = int64(e.ImageFileSize) - storagePath = e.ImagePath + apiContent.ContentType = emoji.ImageContentType + apiContent.ContentLength = int64(emoji.ImageFileSize) + return p.getContent(ctx, + emoji.ImagePath, + apiContent, + ) + case media.SizeStatic: - emojiContent.ContentType = e.ImageStaticContentType - emojiContent.ContentLength = int64(e.ImageStaticFileSize) - storagePath = e.ImageStaticPath + apiContent.ContentType = emoji.ImageStaticContentType + apiContent.ContentLength = int64(emoji.ImageStaticFileSize) + return p.getContent(ctx, + emoji.ImageStaticPath, + apiContent, + ) + default: - return nil, gtserror.NewErrorNotFound(fmt.Errorf("media size %s not recognized for emoji", emojiSize)) + const text = "invalid media attachment size" + return nil, gtserror.NewErrorBadRequest(errors.New(text), text) } - - return p.retrieveFromStorage(ctx, storagePath, emojiContent) } -func (p *Processor) retrieveFromStorage(ctx context.Context, storagePath string, content *apimodel.Content) (*apimodel.Content, gtserror.WithCode) { +// getContent performs the final file fetching of +// stored content at path in storage. This is +// populated in the apimodel.Content{} and returned. +// (note: this also handles un-proxied S3 storage). +func (p *Processor) getContent( + ctx context.Context, + path string, + content *apimodel.Content, +) ( + *apimodel.Content, + gtserror.WithCode, +) { // If running on S3 storage with proxying disabled then - // just fetch a pre-signed URL instead of serving the content. - if url := p.state.Storage.URL(ctx, storagePath); url != nil { + // just fetch pre-signed URL instead of the content. + if url := p.state.Storage.URL(ctx, path); url != nil { content.URL = url return content, nil } - reader, err := p.state.Storage.GetStream(ctx, storagePath) - if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error retrieving from storage: %s", err)) + // Fetch file stream for the stored media at path. + rc, err := p.state.Storage.GetStream(ctx, path) + if err != nil && !storage.IsNotFound(err) { + err := gtserror.Newf("error getting file %s from storage: %w", path, err) + return nil, gtserror.NewErrorInternalError(err) } - content.Content = reader + // Ensure found. + if rc == nil { + const text = "file not found" + return nil, gtserror.NewErrorNotFound(errors.New(text), text) + } + + // Return with stream. + content.Content = rc return content, nil } + +func parseType(s string) (media.Type, error) { + switch s { + case string(media.TypeAttachment): + return media.TypeAttachment, nil + case string(media.TypeHeader): + return media.TypeHeader, nil + case string(media.TypeAvatar): + return media.TypeAvatar, nil + case string(media.TypeEmoji): + return media.TypeEmoji, nil + } + return "", fmt.Errorf("%s not a recognized media.Type", s) +} + +func parseSize(s string) (media.Size, error) { + switch s { + case string(media.SizeSmall): + return media.SizeSmall, nil + case string(media.SizeOriginal): + return media.SizeOriginal, nil + case string(media.SizeStatic): + return media.SizeStatic, nil + } + return "", fmt.Errorf("%s not a recognized media.Size", s) +} |