summaryrefslogtreecommitdiff
path: root/internal
diff options
context:
space:
mode:
authorLibravatar kim <grufwub@gmail.com>2025-11-10 13:07:34 +0100
committerLibravatar tobi <tobi.smethurst@protonmail.com>2025-11-17 14:14:57 +0100
commitc61b89fd413b3a8f3782981509d21186bef82a1f (patch)
tree503bc2d83cf7d5ea35ef974ec2c5ae9050a3a074 /internal
parent[chore] update dependencies (#4547) (diff)
downloadgotosocial-c61b89fd413b3a8f3782981509d21186bef82a1f.tar.xz
[performance] remove hard reliance on .Cached field to indicate whether media / emoji is cached (#4545)
This removes our hard reliance on the `.Cached` field of media and emojis to determine whether it exists in storage. We still make use of it as a useful flag to know whether to even bother checking storage, but we ultimately rely on the `ErrNotFound` response of storage to determine whether the media exists and needs recaching. This now removes our hard reliance on performing the `FixCacheStatus()` cleanup operations for media and emojis, which should reduce a whole bunch of S3 storage driver calls (thus, reducing cost for metered S3 buckets). Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4545 Co-authored-by: kim <grufwub@gmail.com> Co-committed-by: kim <grufwub@gmail.com>
Diffstat (limited to 'internal')
-rw-r--r--internal/cleaner/cleaner.go4
-rw-r--r--internal/cleaner/emoji.go8
-rw-r--r--internal/cleaner/media.go8
-rw-r--r--internal/processing/admin/media.go8
-rw-r--r--internal/processing/media/getfile.go245
5 files changed, 172 insertions, 101 deletions
diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go
index 7e32566ee..ab9a1867e 100644
--- a/internal/cleaner/cleaner.go
+++ b/internal/cleaner/cleaner.go
@@ -141,8 +141,8 @@ func (c *Cleaner) ScheduleJobs() error {
)
}
- // Time travel from
- // year zero, groovy.
+ // Time travel from the
+ // year zero, groovy baby.
firstCleanupAt := time.Date(
now.Year(),
now.Month(),
diff --git a/internal/cleaner/emoji.go b/internal/cleaner/emoji.go
index 245dfac27..e72ee9665 100644
--- a/internal/cleaner/emoji.go
+++ b/internal/cleaner/emoji.go
@@ -41,10 +41,16 @@ func (e *Emoji) All(ctx context.Context, maxRemoteDays int) {
e.LogUncacheRemote(ctx, t)
e.LogFixBroken(ctx)
e.LogPruneUnused(ctx)
- e.LogFixCacheStates(ctx)
_ = e.state.Storage.Storage.Clean(ctx)
}
+// AllAndFix calls LogFixCacheStates(), followed by All(), it
+// is done this way round so Storage.Clean() is performed last.
+func (e *Emoji) AllAndFix(ctx context.Context, maxRemoteDays int) {
+ e.LogFixCacheStates(ctx)
+ e.All(ctx, maxRemoteDays)
+}
+
// LogUncacheRemote performs Emoji.UncacheRemote(...), logging the start and outcome.
func (e *Emoji) LogUncacheRemote(ctx context.Context, olderThan time.Time) {
log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp))
diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go
index 99fd5779f..bd02cb2a3 100644
--- a/internal/cleaner/media.go
+++ b/internal/cleaner/media.go
@@ -44,10 +44,16 @@ func (m *Media) All(ctx context.Context, maxRemoteDays int) {
m.LogUncacheRemote(ctx, t)
m.LogPruneOrphaned(ctx)
m.LogPruneUnused(ctx)
- m.LogFixCacheStates(ctx)
_ = m.state.Storage.Storage.Clean(ctx)
}
+// AllAndFix calls LogFixCacheStates(), followed by All(), it
+// is done this way round so Storage.Clean() is performed last.
+func (m *Media) AllAndFix(ctx context.Context, maxRemoteDays int) {
+ m.LogFixCacheStates(ctx)
+ m.All(ctx, maxRemoteDays)
+}
+
// LogUncacheRemote performs Media.UncacheRemote(...), logging the start and outcome.
func (m *Media) LogUncacheRemote(ctx context.Context, olderThan time.Time) {
log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp))
diff --git a/internal/processing/admin/media.go b/internal/processing/admin/media.go
index 785ee0e98..11394bbed 100644
--- a/internal/processing/admin/media.go
+++ b/internal/processing/admin/media.go
@@ -56,11 +56,11 @@ func (p *Processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gt
return gtserror.NewErrorBadRequest(err, err.Error())
}
- // Start background task performing all media cleanup tasks.
go func() {
- ctx := context.Background()
- p.cleaner.Media().All(ctx, mediaRemoteCacheDays)
- p.cleaner.Emoji().All(ctx, mediaRemoteCacheDays)
+ // Start background task performing all media cleanup tasks.
+ ctx := gtscontext.WithValues(context.Background(), ctx)
+ p.cleaner.Media().AllAndFix(ctx, mediaRemoteCacheDays)
+ p.cleaner.Emoji().AllAndFix(ctx, mediaRemoteCacheDays)
}()
return nil
diff --git a/internal/processing/media/getfile.go b/internal/processing/media/getfile.go
index 3b9b92adc..0aeac04b3 100644
--- a/internal/processing/media/getfile.go
+++ b/internal/processing/media/getfile.go
@@ -21,6 +21,8 @@ import (
"context"
"errors"
"fmt"
+ "io"
+ "net/http"
"net/url"
"strings"
"time"
@@ -33,6 +35,7 @@ import (
"code.superseriousbusiness.org/gotosocial/internal/regexes"
"code.superseriousbusiness.org/gotosocial/internal/storage"
"code.superseriousbusiness.org/gotosocial/internal/uris"
+ "code.superseriousbusiness.org/gotosocial/internal/util"
)
// GetFile retrieves a file from storage and streams it back
@@ -162,47 +165,94 @@ func (p *Processor) getAttachmentContent(
requestUser = requester.Username
}
- // Ensure that stored media is cached.
- // (this handles local media / recaches).
- attach, err = p.federator.RefreshMedia(
- ctx,
- requestUser,
- attach,
- media.AdditionalMediaInfo{},
- false,
- )
- if err != nil {
- err := gtserror.Newf("error recaching media: %w", err)
- return nil, gtserror.NewErrorNotFound(err)
- }
-
- // Start preparing API content model.
- apiContent := &apimodel.Content{}
-
- // Retrieve appropriate
- // size file from storage.
+ // Start preparing API content model and other
+ // values depending on requested media size.
+ var content apimodel.Content
+ var mediaPath string
switch sizeStr {
+ // Original media size.
case media.SizeOriginal:
- apiContent.ContentType = attach.File.ContentType
- apiContent.ContentLength = int64(attach.File.FileSize)
- return p.getContent(ctx,
- attach.File.Path,
- apiContent,
- )
+ content.ContentType = attach.File.ContentType
+ content.ContentLength = int64(attach.File.FileSize)
+ mediaPath = attach.File.Path
+ // Thumbnail media size.
case media.SizeSmall:
- apiContent.ContentType = attach.Thumbnail.ContentType
- apiContent.ContentLength = int64(attach.Thumbnail.FileSize)
- return p.getContent(ctx,
- attach.Thumbnail.Path,
- apiContent,
- )
+ content.ContentType = attach.Thumbnail.ContentType
+ content.ContentLength = int64(attach.Thumbnail.FileSize)
+ mediaPath = attach.Thumbnail.Path
default:
- const text = "invalid media attachment size"
- return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
+ const text = "invalid media size"
+ return nil, gtserror.NewErrorBadRequest(
+ errors.New(text),
+ text,
+ )
+ }
+
+ // Attachment file
+ // stream from storage.
+ var rc io.ReadCloser
+
+ // Check media is meant
+ // to be cached locally.
+ if *attach.Cached {
+
+ // Check storage for media at determined path.
+ rc, err = p.state.Storage.GetStream(ctx, mediaPath)
+ if err != nil && !storage.IsNotFound(err) {
+ err := gtserror.Newf("storage error getting media %s: %w", attach.URL, err)
+ return nil, gtserror.NewErrorInternalError(err)
+ }
+ }
+
+ if rc == nil {
+ // This is local media without
+ // a cached attachment, unfulfillable!
+ if attach.IsLocal() {
+ return nil, gtserror.NewfWithCode(http.StatusNotFound,
+ "local media file not found: %s", attach.URL)
+ }
+
+ // Whether the cached flag was set or
+ // not, we know it isn't in storage.
+ attach.Cached = util.Ptr(false)
+
+ // Attempt to recache this remote media.
+ attach, err = p.federator.RefreshMedia(ctx,
+ requestUser,
+ attach,
+ media.AdditionalMediaInfo{},
+ false,
+ )
+ if err != nil {
+ err := gtserror.Newf("error recaching media %s: %w", attach.URL, err)
+ return nil, gtserror.NewErrorNotFound(err)
+ }
+
+ // Check storage for media at determined path.
+ rc, err = p.state.Storage.GetStream(ctx, mediaPath)
+ if err != nil && !storage.IsNotFound(err) {
+ err := gtserror.Newf("storage error getting media %s: %w", attach.URL, err)
+ return nil, gtserror.NewErrorInternalError(err)
+ } else if rc == nil {
+ return nil, gtserror.NewfWithCode(http.StatusNotFound,
+ "remote media file not found: %s", attach.URL)
+ }
+ }
+
+ // If running on S3 storage with proxying disabled,
+ // just fetch a pre-signed URL instead of the content.
+ if url := p.state.Storage.URL(ctx, mediaPath); url != nil {
+ _ = rc.Close() // close storage stream
+ content.URL = url
+ return &content, nil
}
+
+ // Return with stream.
+ content.Content = rc
+ return &content, nil
}
func (p *Processor) getEmojiContent(
@@ -242,83 +292,92 @@ func (p *Processor) getEmojiContent(
return nil, gtserror.NewErrorNotFound(errors.New(text), text)
}
- // Ensure that stored emoji is cached.
- // (this handles local emoji / recaches).
- emoji, err = p.federator.RecacheEmoji(
- ctx,
- emoji,
- false, // async
- )
- if err != nil {
- err := gtserror.Newf("error recaching emoji: %w", err)
- return nil, gtserror.NewErrorNotFound(err)
- }
-
- // Start preparing API content model.
- apiContent := &apimodel.Content{}
-
- // Retrieve appropriate
- // size file from storage.
+ // Start preparing API content model and other
+ // values depending on requested media size.
+ var content apimodel.Content
+ var emojiPath string
switch sizeStr {
+ // Original emoji image.
case media.SizeOriginal:
- apiContent.ContentType = emoji.ImageContentType
- apiContent.ContentLength = int64(emoji.ImageFileSize)
- return p.getContent(ctx,
- emoji.ImagePath,
- apiContent,
- )
+ content.ContentType = emoji.ImageContentType
+ content.ContentLength = int64(emoji.ImageFileSize)
+ emojiPath = emoji.ImagePath
+ // Static emoji image.
case media.SizeStatic:
- apiContent.ContentType = emoji.ImageStaticContentType
- apiContent.ContentLength = int64(emoji.ImageStaticFileSize)
- return p.getContent(ctx,
- emoji.ImageStaticPath,
- apiContent,
- )
+ content.ContentType = emoji.ImageStaticContentType
+ content.ContentLength = int64(emoji.ImageStaticFileSize)
+ emojiPath = emoji.ImageStaticPath
default:
- const text = "invalid media attachment size"
- return nil, gtserror.NewErrorBadRequest(errors.New(text), text)
+ const text = "invalid emoji size"
+ return nil, gtserror.NewErrorBadRequest(
+ errors.New(text),
+ text,
+ )
}
-}
-// getContent performs the final file fetching of
-// stored content at path in storage. This is
-// populated in the apimodel.Content{} and returned.
-// (note: this also handles un-proxied S3 storage).
-func (p *Processor) getContent(
- ctx context.Context,
- path string,
- content *apimodel.Content,
-) (
- *apimodel.Content,
- gtserror.WithCode,
-) {
- // If running on S3 storage with proxying disabled then
- // just fetch pre-signed URL instead of the content.
- if url := p.state.Storage.URL(ctx, path); url != nil {
- content.URL = url
- return content, nil
- }
+ // Emoji image file
+ // stream from storage.
+ var rc io.ReadCloser
- // Fetch file stream for the stored media at path.
- rc, err := p.state.Storage.GetStream(ctx, path)
- if err != nil && !storage.IsNotFound(err) {
- err := gtserror.Newf("error getting file %s from storage: %w", path, err)
- return nil, gtserror.NewErrorInternalError(err)
+ // Check emoji is meant
+ // to be cached locally.
+ if *emoji.Cached {
+
+ // Check storage for emoji at determined image path.
+ rc, err = p.state.Storage.GetStream(ctx, emojiPath)
+ if err != nil && !storage.IsNotFound(err) {
+ err := gtserror.Newf("storage error getting emoji %s: %w", emoji.URI, err)
+ return nil, gtserror.NewErrorInternalError(err)
+ }
}
- // Ensure found.
if rc == nil {
- err := gtserror.Newf("file not found at %s", path)
- const text = "file not found"
- return nil, gtserror.NewErrorNotFound(err, text)
+ // This is a local emoji without
+ // a cached image, unfulfillable!
+ if emoji.IsLocal() {
+ return nil, gtserror.NewfWithCode(http.StatusNotFound,
+ "local emoji image not found: %s", emoji.URI)
+ }
+
+ // Whether the cached flag was set or
+ // not, we know it isn't in storage.
+ emoji.Cached = util.Ptr(false)
+
+ // Attempt to recache this remote emoji.
+ emoji, err = p.federator.RecacheEmoji(ctx,
+ emoji,
+ false,
+ )
+ if err != nil {
+ err := gtserror.Newf("error recaching emoji %s: %w", emoji.URI, err)
+ return nil, gtserror.NewErrorNotFound(err)
+ }
+
+ // Check storage for emoji at determined image path.
+ rc, err = p.state.Storage.GetStream(ctx, emojiPath)
+ if err != nil && !storage.IsNotFound(err) {
+ err := gtserror.Newf("storage error getting emoji %s after recache: %w", emoji.URI, err)
+ return nil, gtserror.NewErrorInternalError(err)
+ } else if rc == nil {
+ return nil, gtserror.NewfWithCode(http.StatusNotFound,
+ "remote emoji image not found: %s", emoji.URI)
+ }
+ }
+
+ // If running on S3 storage with proxying disabled,
+ // just fetch a pre-signed URL instead of the content.
+ if url := p.state.Storage.URL(ctx, emojiPath); url != nil {
+ _ = rc.Close() // close storage stream
+ content.URL = url
+ return &content, nil
}
// Return with stream.
content.Content = rc
- return content, nil
+ return &content, nil
}
// handles serving Content for "unknown" file