diff options
author | 2023-06-22 20:46:36 +0100 | |
---|---|---|
committer | 2023-06-22 20:46:36 +0100 | |
commit | 9a22102fa8b1ce47571d5bba71e8f36895d21bf0 (patch) | |
tree | 3c2af6db0a3905d31243cd840d1dd50bea59dbb0 /internal/media | |
parent | [docs] Clarify email requirement for OIDC (#1918) (diff) | |
download | gotosocial-9a22102fa8b1ce47571d5bba71e8f36895d21bf0.tar.xz |
[bugfix/chore] oauth entropy fix + media cleanup tasks rewrite (#1853)
Diffstat (limited to 'internal/media')
-rw-r--r-- | internal/media/manager.go | 41 | ||||
-rw-r--r-- | internal/media/manager_test.go | 6 | ||||
-rw-r--r-- | internal/media/processingemoji.go | 29 | ||||
-rw-r--r-- | internal/media/processingmedia.go | 31 | ||||
-rw-r--r-- | internal/media/prune.go | 373 | ||||
-rw-r--r-- | internal/media/prune_test.go | 357 | ||||
-rw-r--r-- | internal/media/refetch.go | 2 |
7 files changed, 39 insertions, 800 deletions
diff --git a/internal/media/manager.go b/internal/media/manager.go index ec95b67e9..1d673128a 100644 --- a/internal/media/manager.go +++ b/internal/media/manager.go @@ -25,10 +25,8 @@ import ( "time" "codeberg.org/gruf/go-iotools" - "codeberg.org/gruf/go-runners" - "codeberg.org/gruf/go-sched" "codeberg.org/gruf/go-store/v2/storage" - "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/id" "github.com/superseriousbusiness/gotosocial/internal/log" @@ -61,7 +59,6 @@ type Manager struct { // See internal/concurrency.NewWorkerPool() documentation for further information. func NewManager(state *state.State) *Manager { m := &Manager{state: state} - scheduleCleanupJobs(m) return m } @@ -214,7 +211,7 @@ func (m *Manager) ProcessMedia(ctx context.Context, data DataFunc, accountID str func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode string, emojiID string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) { instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "") if err != nil { - return nil, fmt.Errorf("preProcessEmoji: error fetching this instance account from the db: %s", err) + return nil, gtserror.Newf("error fetching this instance account from the db: %s", err) } var ( @@ -227,7 +224,7 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode // Look for existing emoji by given ID. emoji, err = m.state.DB.GetEmojiByID(ctx, emojiID) if err != nil { - return nil, fmt.Errorf("preProcessEmoji: error fetching emoji to refresh from the db: %s", err) + return nil, gtserror.Newf("error fetching emoji to refresh from the db: %s", err) } // if this is a refresh, we will end up with new images @@ -260,7 +257,7 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode newPathID, err = id.NewRandomULID() if err != nil { - return nil, fmt.Errorf("preProcessEmoji: error generating alternateID for emoji refresh: %s", err) + return nil, gtserror.Newf("error generating alternateID for emoji refresh: %s", err) } // store + serve static image at new path ID @@ -356,33 +353,3 @@ func (m *Manager) ProcessEmoji(ctx context.Context, data DataFunc, shortcode str return emoji, nil } - -func scheduleCleanupJobs(m *Manager) { - const day = time.Hour * 24 - - // Calculate closest midnight. - now := time.Now() - midnight := now.Round(day) - - if midnight.Before(now) { - // since <= 11:59am rounds down. - midnight = midnight.Add(day) - } - - // Get ctx associated with scheduler run state. - done := m.state.Workers.Scheduler.Done() - doneCtx := runners.CancelCtx(done) - - // TODO: we'll need to do some thinking to make these - // jobs restartable if we want to implement reloads in - // the future that make call to Workers.Stop() -> Workers.Start(). - - // Schedule the PruneAll task to execute every day at midnight. - m.state.Workers.Scheduler.Schedule(sched.NewJob(func(now time.Time) { - err := m.PruneAll(doneCtx, config.GetMediaRemoteCacheDays(), true) - if err != nil { - log.Errorf(nil, "error during prune: %v", err) - } - log.Infof(nil, "finished pruning all in %s", time.Since(now)) - }).EveryAt(midnight, day)) -} diff --git a/internal/media/manager_test.go b/internal/media/manager_test.go index 2bee1091d..7b9b66147 100644 --- a/internal/media/manager_test.go +++ b/internal/media/manager_test.go @@ -214,7 +214,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLarge() { // do a blocking call to fetch the emoji emoji, err := processingEmoji.LoadEmoji(ctx) - suite.EqualError(err, "given emoji size 630kiB greater than max allowed 50.0kiB") + suite.EqualError(err, "store: given emoji size 630kiB greater than max allowed 50.0kiB") suite.Nil(emoji) } @@ -238,7 +238,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLargeNoSizeGiven() { // do a blocking call to fetch the emoji emoji, err := processingEmoji.LoadEmoji(ctx) - suite.EqualError(err, "calculated emoji size 630kiB greater than max allowed 50.0kiB") + suite.EqualError(err, "store: calculated emoji size 630kiB greater than max allowed 50.0kiB") suite.Nil(emoji) } @@ -626,7 +626,7 @@ func (suite *ManagerTestSuite) TestNotAnMp4ProcessBlocking() { // we should get an error while loading attachment, err := processingMedia.LoadAttachment(ctx) - suite.EqualError(err, "error decoding video: error determining video metadata: [width height framerate]") + suite.EqualError(err, "finish: error decoding video: error determining video metadata: [width height framerate]") suite.Nil(attachment) } diff --git a/internal/media/processingemoji.go b/internal/media/processingemoji.go index 7c3db8196..d3a1edbf8 100644 --- a/internal/media/processingemoji.go +++ b/internal/media/processingemoji.go @@ -28,6 +28,7 @@ import ( "codeberg.org/gruf/go-runners" "github.com/h2non/filetype" "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/uris" @@ -137,7 +138,7 @@ func (p *ProcessingEmoji) load(ctx context.Context) (*gtsmodel.Emoji, bool, erro } // Existing emoji we're refreshing, so only need to update. - _, err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji, columns...) + err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji, columns...) return err } @@ -160,7 +161,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // Load media from provided data fn. rc, sz, err := p.dataFn(ctx) if err != nil { - return fmt.Errorf("error executing data function: %w", err) + return gtserror.Newf("error executing data function: %w", err) } defer func() { @@ -177,13 +178,13 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // Read the first 261 header bytes into buffer. if _, err := io.ReadFull(rc, hdrBuf); err != nil { - return fmt.Errorf("error reading incoming media: %w", err) + return gtserror.Newf("error reading incoming media: %w", err) } // Parse file type info from header buffer. info, err := filetype.Match(hdrBuf) if err != nil { - return fmt.Errorf("error parsing file type: %w", err) + return gtserror.Newf("error parsing file type: %w", err) } switch info.Extension { @@ -192,7 +193,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // unhandled default: - return fmt.Errorf("unsupported emoji filetype: %s", info.Extension) + return gtserror.Newf("unsupported emoji filetype: %s", info.Extension) } // Recombine header bytes with remaining stream @@ -211,7 +212,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // Check that provided size isn't beyond max. We check beforehand // so that we don't attempt to stream the emoji into storage if not needed. if size := bytesize.Size(sz); sz > 0 && size > maxSize { - return fmt.Errorf("given emoji size %s greater than max allowed %s", size, maxSize) + return gtserror.Newf("given emoji size %s greater than max allowed %s", size, maxSize) } var pathID string @@ -241,14 +242,14 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // Attempt to remove existing emoji at storage path (might be broken / out-of-date) if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath); err != nil { - return fmt.Errorf("error removing emoji from storage: %v", err) + return gtserror.Newf("error removing emoji from storage: %v", err) } } // Write the final image reader stream to our storage. sz, err = p.mgr.state.Storage.PutStream(ctx, p.emoji.ImagePath, r) if err != nil { - return fmt.Errorf("error writing emoji to storage: %w", err) + return gtserror.Newf("error writing emoji to storage: %w", err) } // Once again check size in case none was provided previously. @@ -257,7 +258,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath); err != nil { log.Errorf(ctx, "error removing too-large-emoji from storage: %v", err) } - return fmt.Errorf("calculated emoji size %s greater than max allowed %s", size, maxSize) + return gtserror.Newf("calculated emoji size %s greater than max allowed %s", size, maxSize) } // Fill in remaining attachment data now it's stored. @@ -278,19 +279,19 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error { // Fetch a stream to the original file in storage. rc, err := p.mgr.state.Storage.GetStream(ctx, p.emoji.ImagePath) if err != nil { - return fmt.Errorf("error loading file from storage: %w", err) + return gtserror.Newf("error loading file from storage: %w", err) } defer rc.Close() // Decode the image from storage. staticImg, err := decodeImage(rc) if err != nil { - return fmt.Errorf("error decoding image: %w", err) + return gtserror.Newf("error decoding image: %w", err) } // The image should be in-memory by now. if err := rc.Close(); err != nil { - return fmt.Errorf("error closing file: %w", err) + return gtserror.Newf("error closing file: %w", err) } // This shouldn't already exist, but we do a check as it's worth logging. @@ -298,7 +299,7 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error { log.Warnf(ctx, "static emoji already exists at storage path: %s", p.emoji.ImagePath) // Attempt to remove static existing emoji at storage path (might be broken / out-of-date) if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImageStaticPath); err != nil { - return fmt.Errorf("error removing static emoji from storage: %v", err) + return gtserror.Newf("error removing static emoji from storage: %v", err) } } @@ -308,7 +309,7 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error { // Stream-encode the PNG static image into storage. sz, err := p.mgr.state.Storage.PutStream(ctx, p.emoji.ImageStaticPath, enc) if err != nil { - return fmt.Errorf("error stream-encoding static emoji to storage: %w", err) + return gtserror.Newf("error stream-encoding static emoji to storage: %w", err) } // Set written image size. diff --git a/internal/media/processingmedia.go b/internal/media/processingmedia.go index 5c66f561d..acfee48f3 100644 --- a/internal/media/processingmedia.go +++ b/internal/media/processingmedia.go @@ -30,6 +30,7 @@ import ( "github.com/disintegration/imaging" "github.com/h2non/filetype" terminator "github.com/superseriousbusiness/exif-terminator" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/uris" @@ -145,7 +146,7 @@ func (p *ProcessingMedia) store(ctx context.Context) error { // Load media from provided data fun rc, sz, err := p.dataFn(ctx) if err != nil { - return fmt.Errorf("error executing data function: %w", err) + return gtserror.Newf("error executing data function: %w", err) } defer func() { @@ -162,13 +163,13 @@ func (p *ProcessingMedia) store(ctx context.Context) error { // Read the first 261 header bytes into buffer. if _, err := io.ReadFull(rc, hdrBuf); err != nil { - return fmt.Errorf("error reading incoming media: %w", err) + return gtserror.Newf("error reading incoming media: %w", err) } // Parse file type info from header buffer. info, err := filetype.Match(hdrBuf) if err != nil { - return fmt.Errorf("error parsing file type: %w", err) + return gtserror.Newf("error parsing file type: %w", err) } // Recombine header bytes with remaining stream @@ -187,12 +188,12 @@ func (p *ProcessingMedia) store(ctx context.Context) error { // A file size was provided so we can clean exif data from image. r, err = terminator.Terminate(r, int(sz), info.Extension) if err != nil { - return fmt.Errorf("error cleaning exif data: %w", err) + return gtserror.Newf("error cleaning exif data: %w", err) } } default: - return fmt.Errorf("unsupported file type: %s", info.Extension) + return gtserror.Newf("unsupported file type: %s", info.Extension) } // Calculate attachment file path. @@ -211,14 +212,14 @@ func (p *ProcessingMedia) store(ctx context.Context) error { // Attempt to remove existing media at storage path (might be broken / out-of-date) if err := p.mgr.state.Storage.Delete(ctx, p.media.File.Path); err != nil { - return fmt.Errorf("error removing media from storage: %v", err) + return gtserror.Newf("error removing media from storage: %v", err) } } // Write the final image reader stream to our storage. sz, err = p.mgr.state.Storage.PutStream(ctx, p.media.File.Path, r) if err != nil { - return fmt.Errorf("error writing media to storage: %w", err) + return gtserror.Newf("error writing media to storage: %w", err) } // Set written image size. @@ -245,7 +246,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // Fetch a stream to the original file in storage. rc, err := p.mgr.state.Storage.GetStream(ctx, p.media.File.Path) if err != nil { - return fmt.Errorf("error loading file from storage: %w", err) + return gtserror.Newf("error loading file from storage: %w", err) } defer rc.Close() @@ -256,7 +257,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { case mimeImageJpeg, mimeImageGif, mimeImageWebp: fullImg, err = decodeImage(rc, imaging.AutoOrientation(true)) if err != nil { - return fmt.Errorf("error decoding image: %w", err) + return gtserror.Newf("error decoding image: %w", err) } // .png image (requires ancillary chunk stripping) @@ -265,14 +266,14 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { Reader: rc, }, imaging.AutoOrientation(true)) if err != nil { - return fmt.Errorf("error decoding image: %w", err) + return gtserror.Newf("error decoding image: %w", err) } // .mp4 video type case mimeVideoMp4: video, err := decodeVideoFrame(rc) if err != nil { - return fmt.Errorf("error decoding video: %w", err) + return gtserror.Newf("error decoding video: %w", err) } // Set video frame as image. @@ -286,7 +287,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // The image should be in-memory by now. if err := rc.Close(); err != nil { - return fmt.Errorf("error closing file: %w", err) + return gtserror.Newf("error closing file: %w", err) } // Set full-size dimensions in attachment info. @@ -314,7 +315,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // Blurhash needs generating from thumb. hash, err := thumbImg.Blurhash() if err != nil { - return fmt.Errorf("error generating blurhash: %w", err) + return gtserror.Newf("error generating blurhash: %w", err) } // Set the attachment blurhash. @@ -326,7 +327,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // Attempt to remove existing thumbnail at storage path (might be broken / out-of-date) if err := p.mgr.state.Storage.Delete(ctx, p.media.Thumbnail.Path); err != nil { - return fmt.Errorf("error removing thumbnail from storage: %v", err) + return gtserror.Newf("error removing thumbnail from storage: %v", err) } } @@ -338,7 +339,7 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // Stream-encode the JPEG thumbnail image into storage. sz, err := p.mgr.state.Storage.PutStream(ctx, p.media.Thumbnail.Path, enc) if err != nil { - return fmt.Errorf("error stream-encoding thumbnail to storage: %w", err) + return gtserror.Newf("error stream-encoding thumbnail to storage: %w", err) } // Fill in remaining thumbnail now it's stored diff --git a/internal/media/prune.go b/internal/media/prune.go deleted file mode 100644 index 71c8e00ce..000000000 --- a/internal/media/prune.go +++ /dev/null @@ -1,373 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see <http://www.gnu.org/licenses/>. - -package media - -import ( - "context" - "errors" - "fmt" - "time" - - "codeberg.org/gruf/go-store/v2/storage" - "github.com/superseriousbusiness/gotosocial/internal/db" - "github.com/superseriousbusiness/gotosocial/internal/gtserror" - "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" - "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/regexes" - "github.com/superseriousbusiness/gotosocial/internal/uris" -) - -const ( - selectPruneLimit = 50 // Amount of media entries to select at a time from the db when pruning. - unusedLocalAttachmentDays = 3 // Number of days to keep local media in storage if not attached to a status. -) - -// PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting -// empty directories from the storage driver. It can be called as a shortcut for calling the below -// pruning functions one by one. -// -// If blocking is true, then any errors encountered during the prune will be combined + returned to -// the caller. If blocking is false, the prune is run in the background and errors are just logged -// instead. -func (m *Manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error { - const dry = false - - f := func(innerCtx context.Context) error { - errs := gtserror.MultiError{} - - pruned, err := m.PruneUnusedLocal(innerCtx, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err)) - } else { - log.Infof(ctx, "pruned %d unused local media", pruned) - } - - pruned, err = m.PruneUnusedRemote(innerCtx, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err)) - } else { - log.Infof(ctx, "pruned %d unused remote media", pruned) - } - - pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err)) - } else { - log.Infof(ctx, "uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays) - } - - pruned, err = m.PruneOrphaned(innerCtx, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err)) - } else { - log.Infof(ctx, "pruned %d orphaned media", pruned) - } - - if err := m.state.Storage.Storage.Clean(innerCtx); err != nil { - errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err)) - } else { - log.Info(ctx, "cleaned storage") - } - - return errs.Combine() - } - - if blocking { - return f(ctx) - } - - go func() { - if err := f(context.Background()); err != nil { - log.Error(ctx, err) - } - }() - - return nil -} - -// PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance. -// -// The returned int is the amount of media that was pruned by this function. -func (m *Manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) { - var ( - totalPruned int - maxID string - attachments []*gtsmodel.MediaAttachment - err error - ) - - // We don't know in advance how many remote attachments will meet - // our criteria for being 'unused'. So a dry run in this case just - // means we iterate through as normal, but do nothing with each entry - // instead of removing it. Define this here so we don't do the 'if dry' - // check inside the loop a million times. - var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error - if !dry { - f = m.deleteAttachment - } else { - f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error { - return nil // noop - } - } - - for attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) { - maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value - - for _, attachment := range attachments { - // Retrieve owning account if possible. - var account *gtsmodel.Account - if accountID := attachment.AccountID; accountID != "" { - account, err = m.state.DB.GetAccountByID(ctx, attachment.AccountID) - if err != nil && !errors.Is(err, db.ErrNoEntries) { - // Only return on a real error. - return 0, fmt.Errorf("PruneUnusedRemote: error fetching account with id %s: %w", accountID, err) - } - } - - // Prune each attachment that meets one of the following criteria: - // - Has no owning account in the database. - // - Is a header but isn't the owning account's current header. - // - Is an avatar but isn't the owning account's current avatar. - if account == nil || - (*attachment.Header && attachment.ID != account.HeaderMediaAttachmentID) || - (*attachment.Avatar && attachment.ID != account.AvatarMediaAttachmentID) { - if err := f(ctx, attachment); err != nil { - return totalPruned, err - } - totalPruned++ - } - } - } - - // Make sure we don't have a real error when we leave the loop. - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return totalPruned, err - } - - return totalPruned, nil -} - -// PruneOrphaned prunes files that exist in storage but which do not have a corresponding -// entry in the database. -// -// If dry is true, then nothing will be changed, only the amount that *would* be removed -// is returned to the caller. -func (m *Manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) { - // Emojis are stored under the instance account, so we - // need the ID of the instance account for the next part. - instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "") - if err != nil { - return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err) - } - - instanceAccountID := instanceAccount.ID - - var orphanedKeys []string - - // Keys in storage will look like the following format: - // `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]` - // We can filter out keys we're not interested in by matching through a regex. - if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, key string) error { - if !regexes.FilePath.MatchString(key) { - // This is not our expected key format. - return nil - } - - // Check whether this storage entry is orphaned. - orphaned, err := m.orphaned(ctx, key, instanceAccountID) - if err != nil { - return fmt.Errorf("error checking orphaned status: %w", err) - } - - if orphaned { - // Add this orphaned entry to list of keys. - orphanedKeys = append(orphanedKeys, key) - } - - return nil - }); err != nil { - return 0, fmt.Errorf("PruneOrphaned: error walking keys: %w", err) - } - - totalPruned := len(orphanedKeys) - - if dry { - // Dry run: don't remove anything. - return totalPruned, nil - } - - // This is not a drill! We have to delete stuff! - return m.removeFiles(ctx, orphanedKeys...) -} - -func (m *Manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) { - pathParts := regexes.FilePath.FindStringSubmatch(key) - if len(pathParts) != 6 { - // This doesn't match our expectations so - // it wasn't created by gts; ignore it. - return false, nil - } - - var ( - mediaType = pathParts[2] - mediaID = pathParts[4] - orphaned = false - ) - - // Look for keys in storage that we don't have an attachment for. - switch Type(mediaType) { - case TypeAttachment, TypeHeader, TypeAvatar: - if _, err := m.state.DB.GetAttachmentByID(ctx, mediaID); err != nil { - if !errors.Is(err, db.ErrNoEntries) { - return false, fmt.Errorf("error calling GetAttachmentByID: %w", err) - } - orphaned = true - } - case TypeEmoji: - // Look using the static URL for the emoji. Emoji images can change, so - // the MEDIA_ID part of the key for emojis will not necessarily correspond - // to the file that's currently being used as the emoji image. - staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng) - if _, err := m.state.DB.GetEmojiByStaticURL(ctx, staticURL); err != nil { - if !errors.Is(err, db.ErrNoEntries) { - return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err) - } - orphaned = true - } - } - - return orphaned, nil -} - -// UncacheRemote uncaches all remote media attachments older than the given amount of days. -// -// In this context, uncacheing means deleting media files from storage and marking the attachment -// as cached=false in the database. -// -// If 'dry' is true, then only a dry run will be performed: nothing will actually be changed. -// -// The returned int is the amount of media that was/would be uncached by this function. -func (m *Manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) { - if olderThanDays < 0 { - return 0, nil - } - - olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays)) - - if dry { - // Dry run, just count eligible entries without removing them. - return m.state.DB.CountRemoteOlderThan(ctx, olderThan) - } - - var ( - totalPruned int - attachments []*gtsmodel.MediaAttachment - err error - ) - - for attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) { - olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value - - for _, attachment := range attachments { - if err := m.uncacheAttachment(ctx, attachment); err != nil { - return totalPruned, err - } - totalPruned++ - } - } - - // Make sure we don't have a real error when we leave the loop. - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return totalPruned, err - } - - return totalPruned, nil -} - -// PruneUnusedLocal prunes unused media attachments that were uploaded by -// a user on this instance, but never actually attached to a status, or attached but -// later detached. -// -// The returned int is the amount of media that was pruned by this function. -func (m *Manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) { - olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays)) - - if dry { - // Dry run, just count eligible entries without removing them. - return m.state.DB.CountLocalUnattachedOlderThan(ctx, olderThan) - } - - var ( - totalPruned int - attachments []*gtsmodel.MediaAttachment - err error - ) - - for attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) { - olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value - - for _, attachment := range attachments { - if err := m.deleteAttachment(ctx, attachment); err != nil { - return totalPruned, err - } - totalPruned++ - } - } - - // Make sure we don't have a real error when we leave the loop. - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return totalPruned, err - } - - return totalPruned, nil -} - -/* - Handy little helpers -*/ - -func (m *Manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { - if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { - return err - } - - // Delete attachment completely. - return m.state.DB.DeleteAttachment(ctx, attachment.ID) -} - -func (m *Manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { - if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { - return err - } - - // Update attachment to reflect that we no longer have it cached. - attachment.Cached = func() *bool { i := false; return &i }() - return m.state.DB.UpdateAttachment(ctx, attachment, "cached") -} - -func (m *Manager) removeFiles(ctx context.Context, keys ...string) (int, error) { - errs := make(gtserror.MultiError, 0, len(keys)) - - for _, key := range keys { - if err := m.state.Storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) { - errs = append(errs, "storage error removing "+key+": "+err.Error()) - } - } - - return len(keys) - len(errs), errs.Combine() -} diff --git a/internal/media/prune_test.go b/internal/media/prune_test.go deleted file mode 100644 index 375ce0c06..000000000 --- a/internal/media/prune_test.go +++ /dev/null @@ -1,357 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see <http://www.gnu.org/licenses/>. - -package media_test - -import ( - "bytes" - "context" - "io" - "os" - "testing" - - "codeberg.org/gruf/go-store/v2/storage" - "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/db" - gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" -) - -type PruneTestSuite struct { - MediaStandardTestSuite -} - -func (suite *PruneTestSuite) TestPruneOrphanedDry() { - // add a big orphan panda to store - b, err := os.ReadFile("./test/big-panda.gif") - if err != nil { - suite.FailNow(err.Error()) - } - - pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" - if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil { - suite.FailNow(err.Error()) - } - - // dry run should show up 1 orphaned panda - totalPruned, err := suite.manager.PruneOrphaned(context.Background(), true) - suite.NoError(err) - suite.Equal(1, totalPruned) - - // panda should still be in storage - hasKey, err := suite.storage.Has(context.Background(), pandaPath) - suite.NoError(err) - suite.True(hasKey) -} - -func (suite *PruneTestSuite) TestPruneOrphanedMoist() { - // add a big orphan panda to store - b, err := os.ReadFile("./test/big-panda.gif") - if err != nil { - suite.FailNow(err.Error()) - } - - pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" - if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil { - suite.FailNow(err.Error()) - } - - // should show up 1 orphaned panda - totalPruned, err := suite.manager.PruneOrphaned(context.Background(), false) - suite.NoError(err) - suite.Equal(1, totalPruned) - - // panda should no longer be in storage - hasKey, err := suite.storage.Has(context.Background(), pandaPath) - suite.NoError(err) - suite.False(hasKey) -} - -func (suite *PruneTestSuite) TestPruneUnusedLocal() { - testAttachment := suite.testAttachments["local_account_1_unattached_1"] - suite.True(*testAttachment.Cached) - - totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false) - suite.NoError(err) - suite.Equal(1, totalPruned) - - _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) - suite.ErrorIs(err, db.ErrNoEntries) -} - -func (suite *PruneTestSuite) TestPruneUnusedLocalDry() { - testAttachment := suite.testAttachments["local_account_1_unattached_1"] - suite.True(*testAttachment.Cached) - - totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), true) - suite.NoError(err) - suite.Equal(1, totalPruned) - - _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) - suite.NoError(err) -} - -func (suite *PruneTestSuite) TestPruneRemoteTwice() { - totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false) - suite.NoError(err) - suite.Equal(1, totalPruned) - - // final prune should prune nothing, since the first prune already happened - totalPrunedAgain, err := suite.manager.PruneUnusedLocal(context.Background(), false) - suite.NoError(err) - suite.Equal(0, totalPrunedAgain) -} - -func (suite *PruneTestSuite) TestPruneOneNonExistent() { - ctx := context.Background() - testAttachment := suite.testAttachments["local_account_1_unattached_1"] - - // Delete this attachment cached on disk - media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID) - suite.NoError(err) - suite.True(*media.Cached) - err = suite.storage.Delete(ctx, media.File.Path) - suite.NoError(err) - - // Now attempt to prune for item with db entry no file - totalPruned, err := suite.manager.PruneUnusedLocal(ctx, false) - suite.NoError(err) - suite.Equal(1, totalPruned) -} - -func (suite *PruneTestSuite) TestPruneUnusedRemote() { - ctx := context.Background() - - // start by clearing zork's avatar + header - zorkOldAvatar := suite.testAttachments["local_account_1_avatar"] - zorkOldHeader := suite.testAttachments["local_account_1_avatar"] - zork := suite.testAccounts["local_account_1"] - zork.AvatarMediaAttachmentID = "" - zork.HeaderMediaAttachmentID = "" - if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { - panic(err) - } - - totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) - suite.NoError(err) - suite.Equal(2, totalPruned) - - // media should no longer be stored - _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - - // attachments should no longer be in the db - _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID) - suite.ErrorIs(err, db.ErrNoEntries) - _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID) - suite.ErrorIs(err, db.ErrNoEntries) -} - -func (suite *PruneTestSuite) TestPruneUnusedRemoteTwice() { - ctx := context.Background() - - // start by clearing zork's avatar + header - zork := suite.testAccounts["local_account_1"] - zork.AvatarMediaAttachmentID = "" - zork.HeaderMediaAttachmentID = "" - if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { - panic(err) - } - - totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) - suite.NoError(err) - suite.Equal(2, totalPruned) - - // final prune should prune nothing, since the first prune already happened - totalPruned, err = suite.manager.PruneUnusedRemote(ctx, false) - suite.NoError(err) - suite.Equal(0, totalPruned) -} - -func (suite *PruneTestSuite) TestPruneUnusedRemoteMultipleAccounts() { - ctx := context.Background() - - // start by clearing zork's avatar + header - zorkOldAvatar := suite.testAttachments["local_account_1_avatar"] - zorkOldHeader := suite.testAttachments["local_account_1_avatar"] - zork := suite.testAccounts["local_account_1"] - zork.AvatarMediaAttachmentID = "" - zork.HeaderMediaAttachmentID = "" - if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { - panic(err) - } - - // set zork's unused header as belonging to turtle - turtle := suite.testAccounts["local_account_1"] - zorkOldHeader.AccountID = turtle.ID - if err := suite.db.UpdateByID(ctx, zorkOldHeader, zorkOldHeader.ID, "account_id"); err != nil { - panic(err) - } - - totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) - suite.NoError(err) - suite.Equal(2, totalPruned) - - // media should no longer be stored - _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - - // attachments should no longer be in the db - _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID) - suite.ErrorIs(err, db.ErrNoEntries) - _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID) - suite.ErrorIs(err, db.ErrNoEntries) -} - -func (suite *PruneTestSuite) TestUncacheRemote() { - testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] - suite.True(*testStatusAttachment.Cached) - - testHeader := suite.testAttachments["remote_account_3_header"] - suite.True(*testHeader.Cached) - - totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, false) - suite.NoError(err) - suite.Equal(2, totalUncached) - - uncachedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID) - suite.NoError(err) - suite.False(*uncachedAttachment.Cached) - - uncachedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID) - suite.NoError(err) - suite.False(*uncachedAttachment.Cached) -} - -func (suite *PruneTestSuite) TestUncacheRemoteDry() { - testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] - suite.True(*testStatusAttachment.Cached) - - testHeader := suite.testAttachments["remote_account_3_header"] - suite.True(*testHeader.Cached) - - totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, true) - suite.NoError(err) - suite.Equal(2, totalUncached) - - uncachedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID) - suite.NoError(err) - suite.True(*uncachedAttachment.Cached) - - uncachedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID) - suite.NoError(err) - suite.True(*uncachedAttachment.Cached) -} - -func (suite *PruneTestSuite) TestUncacheRemoteTwice() { - totalUncached, err := suite.manager.UncacheRemote(context.Background(), 1, false) - suite.NoError(err) - suite.Equal(2, totalUncached) - - // final uncache should uncache nothing, since the first uncache already happened - totalUncachedAgain, err := suite.manager.UncacheRemote(context.Background(), 1, false) - suite.NoError(err) - suite.Equal(0, totalUncachedAgain) -} - -func (suite *PruneTestSuite) TestUncacheAndRecache() { - ctx := context.Background() - testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] - testHeader := suite.testAttachments["remote_account_3_header"] - - totalUncached, err := suite.manager.UncacheRemote(ctx, 1, false) - suite.NoError(err) - suite.Equal(2, totalUncached) - - // media should no longer be stored - _, err = suite.storage.Get(ctx, testStatusAttachment.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, testHeader.File.Path) - suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path) - suite.ErrorIs(err, storage.ErrNotFound) - - // now recache the image.... - data := func(_ context.Context) (io.ReadCloser, int64, error) { - // load bytes from a test image - b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg") - if err != nil { - panic(err) - } - return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil - } - - for _, original := range []*gtsmodel.MediaAttachment{ - testStatusAttachment, - testHeader, - } { - processingRecache, err := suite.manager.PreProcessMediaRecache(ctx, data, original.ID) - suite.NoError(err) - - // synchronously load the recached attachment - recachedAttachment, err := processingRecache.LoadAttachment(ctx) - suite.NoError(err) - suite.NotNil(recachedAttachment) - - // recachedAttachment should be basically the same as the old attachment - suite.True(*recachedAttachment.Cached) - suite.Equal(original.ID, recachedAttachment.ID) - suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place - suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail - suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same - - // recached files should be back in storage - _, err = suite.storage.Get(ctx, recachedAttachment.File.Path) - suite.NoError(err) - _, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path) - suite.NoError(err) - } -} - -func (suite *PruneTestSuite) TestUncacheOneNonExistent() { - ctx := context.Background() - testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] - - // Delete this attachment cached on disk - media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID) - suite.NoError(err) - suite.True(*media.Cached) - err = suite.storage.Delete(ctx, media.File.Path) - suite.NoError(err) - - // Now attempt to uncache remote for item with db entry no file - totalUncached, err := suite.manager.UncacheRemote(ctx, 1, false) - suite.NoError(err) - suite.Equal(2, totalUncached) -} - -func TestPruneOrphanedTestSuite(t *testing.T) { - suite.Run(t, &PruneTestSuite{}) -} diff --git a/internal/media/refetch.go b/internal/media/refetch.go index 80dfe4f60..03f0fbf34 100644 --- a/internal/media/refetch.go +++ b/internal/media/refetch.go @@ -52,7 +52,7 @@ func (m *Manager) RefetchEmojis(ctx context.Context, domain string, dereferenceM // page through emojis 20 at a time, looking for those with missing images for { // Fetch next block of emojis from database - emojis, err := m.state.DB.GetEmojis(ctx, domain, false, true, "", maxShortcodeDomain, "", 20) + emojis, err := m.state.DB.GetEmojisBy(ctx, domain, false, true, "", maxShortcodeDomain, "", 20) if err != nil { if !errors.Is(err, db.ErrNoEntries) { // an actual error has occurred |