diff options
Diffstat (limited to 'internal/cleaner')
-rw-r--r-- | internal/cleaner/cleaner.go | 12 | ||||
-rw-r--r-- | internal/cleaner/cleaner_test.go | 80 | ||||
-rw-r--r-- | internal/cleaner/emoji.go | 317 | ||||
-rw-r--r-- | internal/cleaner/emoji_test.go | 402 | ||||
-rw-r--r-- | internal/cleaner/media.go | 54 |
5 files changed, 802 insertions, 63 deletions
diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go index ee1e4785f..70497c10e 100644 --- a/internal/cleaner/cleaner.go +++ b/internal/cleaner/cleaner.go @@ -61,19 +61,19 @@ func (c *Cleaner) Media() *Media { return &c.media } -// checkFiles checks for each of the provided files, and calls onMissing() if any of them are missing. Returns true if missing. -func (c *Cleaner) checkFiles(ctx context.Context, onMissing func() error, files ...string) (bool, error) { +// haveFiles returns whether all of the provided files exist within current storage. +func (c *Cleaner) haveFiles(ctx context.Context, files ...string) (bool, error) { for _, file := range files { // Check whether each file exists in storage. have, err := c.state.Storage.Has(ctx, file) if err != nil { return false, gtserror.Newf("error checking storage for %s: %w", file, err) } else if !have { - // Missing files, perform hook. - return true, onMissing() + // Missing file(s). + return false, nil } } - return false, nil + return true, nil } // removeFiles removes the provided files, returning the number of them returned. @@ -129,7 +129,7 @@ func scheduleJobs(c *Cleaner) { c.state.Workers.Scheduler.Schedule(sched.NewJob(func(start time.Time) { log.Info(nil, "starting media clean") c.Media().All(doneCtx, config.GetMediaRemoteCacheDays()) - c.Emoji().All(doneCtx) + c.Emoji().All(doneCtx, config.GetMediaRemoteCacheDays()) log.Infof(nil, "finished media clean after %s", time.Since(start)) }).EveryAt(midnight, day)) } diff --git a/internal/cleaner/cleaner_test.go b/internal/cleaner/cleaner_test.go new file mode 100644 index 000000000..d23dac504 --- /dev/null +++ b/internal/cleaner/cleaner_test.go @@ -0,0 +1,80 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package cleaner_test + +import ( + "testing" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/state" + "github.com/superseriousbusiness/gotosocial/testrig" +) + +type CleanerTestSuite struct { + state state.State + cleaner *cleaner.Cleaner + emojis map[string]*gtsmodel.Emoji + suite.Suite +} + +func TestCleanerTestSuite(t *testing.T) { + suite.Run(t, &CleanerTestSuite{}) +} + +func (suite *CleanerTestSuite) SetupSuite() { + testrig.InitTestConfig() + testrig.InitTestLog() +} + +func (suite *CleanerTestSuite) SetupTest() { + // Initialize gts caches. + suite.state.Caches.Init() + + // Ensure scheduler started (even if unused). + suite.state.Workers.Scheduler.Start(nil) + + // Initialize test database. + _ = testrig.NewTestDB(&suite.state) + testrig.StandardDBSetup(suite.state.DB, nil) + + // Initialize test storage (in-memory). + suite.state.Storage = testrig.NewInMemoryStorage() + + // Initialize test cleaner instance. + suite.cleaner = cleaner.New(&suite.state) + + // Allocate new test model emojis. + suite.emojis = testrig.NewTestEmojis() +} + +func (suite *CleanerTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.state.DB) +} + +// mapvals extracts a slice of values from the values contained within the map. +func mapvals[Key comparable, Val any](m map[Key]Val) []Val { + var i int + vals := make([]Val, len(m)) + for _, val := range m { + vals[i] = val + i++ + } + return vals +} diff --git a/internal/cleaner/emoji.go b/internal/cleaner/emoji.go index 35e579171..d2baec7e8 100644 --- a/internal/cleaner/emoji.go +++ b/internal/cleaner/emoji.go @@ -20,6 +20,7 @@ package cleaner import ( "context" "errors" + "time" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtscontext" @@ -36,22 +37,26 @@ type Emoji struct { // All will execute all cleaner.Emoji utilities synchronously, including output logging. // Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. -func (e *Emoji) All(ctx context.Context) { - e.LogPruneMissing(ctx) +func (e *Emoji) All(ctx context.Context, maxRemoteDays int) { + t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays)) + e.LogUncacheRemote(ctx, t) e.LogFixBroken(ctx) + e.LogPruneUnused(ctx) + e.LogFixCacheStates(ctx) + _ = e.state.Storage.Storage.Clean(ctx) } -// LogPruneMissing performs emoji.PruneMissing(...), logging the start and outcome. -func (e *Emoji) LogPruneMissing(ctx context.Context) { - log.Info(ctx, "start") - if n, err := e.PruneMissing(ctx); err != nil { +// LogUncacheRemote performs Emoji.UncacheRemote(...), logging the start and outcome. +func (e *Emoji) LogUncacheRemote(ctx context.Context, olderThan time.Time) { + log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp)) + if n, err := e.UncacheRemote(ctx, olderThan); err != nil { log.Error(ctx, err) } else { - log.Infof(ctx, "pruned: %d", n) + log.Infof(ctx, "uncached: %d", n) } } -// LogFixBroken performs emoji.FixBroken(...), logging the start and outcome. +// LogFixBroken performs Emoji.FixBroken(...), logging the start and outcome. func (e *Emoji) LogFixBroken(ctx context.Context) { log.Info(ctx, "start") if n, err := e.FixBroken(ctx); err != nil { @@ -61,10 +66,78 @@ func (e *Emoji) LogFixBroken(ctx context.Context) { } } -// PruneMissing will delete emoji with missing files from the database and storage driver. -// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function -// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. -func (e *Emoji) PruneMissing(ctx context.Context) (int, error) { +// LogPruneUnused performs Emoji.PruneUnused(...), logging the start and outcome. +func (e *Emoji) LogPruneUnused(ctx context.Context) { + log.Info(ctx, "start") + if n, err := e.PruneUnused(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogFixCacheStates performs Emoji.FixCacheStates(...), logging the start and outcome. +func (e *Emoji) LogFixCacheStates(ctx context.Context) { + log.Info(ctx, "start") + if n, err := e.FixCacheStates(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "fixed: %d", n) + } +} + +// UncacheRemote will uncache all remote emoji older than given input time. Context +// will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) { + var total int + + // Drop time by a minute to improve search, + // (i.e. make it olderThan inclusive search). + olderThan = olderThan.Add(-time.Minute) + + // Store recent time. + mostRecent := olderThan + + for { + // Fetch the next batch of cached emojis older than last-set time. + emojis, err := e.state.DB.GetCachedEmojisOlderThan(ctx, olderThan, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting remote emoji: %w", err) + } + + if len(emojis) == 0 { + // reached end. + break + } + + // Use last created-at as the next 'olderThan' value. + olderThan = emojis[len(emojis)-1].CreatedAt + + for _, emoji := range emojis { + // Check / uncache each remote emoji. + uncached, err := e.uncacheRemote(ctx, + mostRecent, + emoji, + ) + if err != nil { + return total, err + } + + if uncached { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// FixBroken will check all emojis for valid related models (e.g. category). +// Broken media will be automatically updated to remove now-missing models. +// Context will be checked for `gtscontext.DryRun()` to perform the action. +func (e *Emoji) FixBroken(ctx context.Context) (int, error) { var ( total int maxID string @@ -86,8 +159,8 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) { maxID = emojis[len(emojis)-1].ID for _, emoji := range emojis { - // Check / fix missing emoji media. - fixed, err := e.pruneMissing(ctx, emoji) + // Check / fix missing broken emoji. + fixed, err := e.fixBroken(ctx, emoji) if err != nil { return total, err } @@ -103,10 +176,10 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) { return total, nil } -// FixBroken will check all emojis for valid related models (e.g. category). -// Broken media will be automatically updated to remove now-missing models. -// Context will be checked for `gtscontext.DryRun()` to perform the action. -func (e *Emoji) FixBroken(ctx context.Context) (int, error) { +// PruneUnused will delete all unused emoji media from the database and storage driver. +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) PruneUnused(ctx context.Context) (int, error) { var ( total int maxID string @@ -114,9 +187,9 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) { for { // Fetch the next batch of emoji media up to next ID. - emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit) + emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit) if err != nil && !errors.Is(err, db.ErrNoEntries) { - return total, gtserror.Newf("error getting emojis: %w", err) + return total, gtserror.Newf("error getting remote emojis: %w", err) } if len(emojis) == 0 { @@ -128,8 +201,50 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) { maxID = emojis[len(emojis)-1].ID for _, emoji := range emojis { - // Check / fix missing broken emoji. - fixed, err := e.fixBroken(ctx, emoji) + // Check / prune unused emoji media. + fixed, err := e.pruneUnused(ctx, emoji) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// FixCacheStatus will check all emoji for up-to-date cache status (i.e. in storage driver). +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) FixCacheStates(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of emoji media up to next ID. + emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting remote emojis: %w", err) + } + + if len(emojis) == 0 { + // reached end. + break + } + + // Use last as the next 'maxID' value. + maxID = emojis[len(emojis)-1].ID + + for _, emoji := range emojis { + // Check / fix required emoji cache states. + fixed, err := e.fixCacheState(ctx, emoji) if err != nil { return total, err } @@ -145,22 +260,113 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) { return total, nil } -func (e *Emoji) pruneMissing(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { - return e.checkFiles(ctx, func() error { - // Emoji missing files, delete it. - // NOTE: if we ever support uncaching - // of emojis, change to e.uncache(). - // In that case we should also rename - // this function to match the media - // equivalent -> fixCacheState(). - log.WithContext(ctx). - WithField("emoji", emoji.ID). - Debug("deleting due to missing emoji") - return e.delete(ctx, emoji) - }, +func (e *Emoji) pruneUnused(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + // Start a log entry for emoji. + l := log.WithContext(ctx). + WithField("emoji", emoji.ID) + + // Load any related accounts using this emoji. + accounts, err := e.getRelatedAccounts(ctx, emoji) + if err != nil { + return false, err + } else if len(accounts) > 0 { + l.Debug("skipping as account emoji in use") + return false, nil + } + + // Load any related statuses using this emoji. + statuses, err := e.getRelatedStatuses(ctx, emoji) + if err != nil { + return false, err + } else if len(statuses) > 0 { + l.Debug("skipping as status emoji in use") + return false, nil + } + + // Check not recently created, give it some time to be "used" again. + if time.Now().Add(-24 * time.Hour * 7).Before(emoji.CreatedAt) { + l.Debug("skipping due to recently created") + return false, nil + } + + // Emoji totally unused, delete it. + l.Debug("deleting unused emoji") + return true, e.delete(ctx, emoji) +} + +func (e *Emoji) fixCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + // Start a log entry for emoji. + l := log.WithContext(ctx). + WithField("emoji", emoji.ID) + + // Check whether files exist. + exist, err := e.haveFiles(ctx, emoji.ImageStaticPath, emoji.ImagePath, ) + if err != nil { + return false, err + } + + switch { + case *emoji.Cached && !exist: + // Mark as uncached if expected files don't exist. + l.Debug("cached=true exists=false => marking uncached") + return true, e.uncache(ctx, emoji) + + case !*emoji.Cached && exist: + // Remove files if we don't expect them to exist. + l.Debug("cached=false exists=true => removing files") + _, err := e.removeFiles(ctx, + emoji.ImageStaticPath, + emoji.ImagePath, + ) + return true, err + + default: + return false, nil + } +} + +func (e *Emoji) uncacheRemote(ctx context.Context, after time.Time, emoji *gtsmodel.Emoji) (bool, error) { + if !*emoji.Cached { + // Already uncached. + return false, nil + } + + // Start a log entry for emoji. + l := log.WithContext(ctx). + WithField("emoji", emoji.ID) + + // Load any related accounts using this emoji. + accounts, err := e.getRelatedAccounts(ctx, emoji) + if err != nil { + return false, err + } + + for _, account := range accounts { + if account.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched account") + return false, nil + } + } + + // Load any related statuses using this emoji. + statuses, err := e.getRelatedStatuses(ctx, emoji) + if err != nil { + return false, err + } + + for _, status := range statuses { + if status.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched status") + return false, nil + } + } + + // This emoji is too old, uncache it. + l.Debug("uncaching old remote emoji") + return true, e.uncache(ctx, emoji) } func (e *Emoji) fixBroken(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { @@ -214,6 +420,47 @@ func (e *Emoji) getRelatedCategory(ctx context.Context, emoji *gtsmodel.Emoji) ( return category, false, nil } +func (e *Emoji) getRelatedAccounts(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Account, error) { + accounts, err := e.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) + if err != nil { + return nil, gtserror.Newf("error fetching accounts using emoji %s: %w", emoji.ID, err) + } + return accounts, nil +} + +func (e *Emoji) getRelatedStatuses(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Status, error) { + statuses, err := e.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) + if err != nil { + return nil, gtserror.Newf("error fetching statuses using emoji %s: %w", emoji.ID, err) + } + return statuses, nil +} + +func (e *Emoji) uncache(ctx context.Context, emoji *gtsmodel.Emoji) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove emoji and static. + _, err := e.removeFiles(ctx, + emoji.ImagePath, + emoji.ImageStaticPath, + ) + if err != nil { + return gtserror.Newf("error removing emoji files: %w", err) + } + + // Update emoji to reflect that we no longer have it cached. + log.Debugf(ctx, "marking emoji as uncached: %s", emoji.ID) + emoji.Cached = func() *bool { i := false; return &i }() + if err := e.state.DB.UpdateEmoji(ctx, emoji, "cached"); err != nil { + return gtserror.Newf("error updating emoji: %w", err) + } + + return nil +} + func (e *Emoji) delete(ctx context.Context, emoji *gtsmodel.Emoji) error { if gtscontext.DryRun(ctx) { // Dry run, do nothing. diff --git a/internal/cleaner/emoji_test.go b/internal/cleaner/emoji_test.go new file mode 100644 index 000000000..81fde6e48 --- /dev/null +++ b/internal/cleaner/emoji_test.go @@ -0,0 +1,402 @@ +package cleaner_test + +import ( + "context" + "errors" + "time" + + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +) + +func (suite *CleanerTestSuite) TestEmojiUncacheRemote() { + suite.testEmojiUncacheRemote( + context.Background(), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiUncacheRemoteDryRun() { + suite.testEmojiUncacheRemote( + gtscontext.SetDryRun(context.Background()), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiFixBroken() { + suite.testEmojiFixBroken( + context.Background(), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiFixBrokenDryRun() { + suite.testEmojiFixBroken( + gtscontext.SetDryRun(context.Background()), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiPruneUnused() { + suite.testEmojiPruneUnused( + context.Background(), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiPruneUnusedDryRun() { + suite.testEmojiPruneUnused( + gtscontext.SetDryRun(context.Background()), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiFixCacheStates() { + suite.testEmojiFixCacheStates( + context.Background(), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiFixCacheStatesDryRun() { + suite.testEmojiFixCacheStates( + gtscontext.SetDryRun(context.Background()), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) testEmojiUncacheRemote(ctx context.Context, emojis []*gtsmodel.Emoji) { + var uncacheIDs []string + + // Test state. + t := suite.T() + + // Get max remote cache days to keep. + days := config.GetMediaRemoteCacheDays() + olderThan := time.Now().Add(-24 * time.Hour * time.Duration(days)) + + for _, emoji := range emojis { + // Check whether this emoji should be uncached. + ok, err := suite.shouldUncacheEmoji(ctx, emoji, olderThan) + if err != nil { + t.Fatalf("error checking whether emoji should be uncached: %v", err) + } + + if ok { + // Mark this emoji ID as to be uncached. + uncacheIDs = append(uncacheIDs, emoji.ID) + } + } + + // Attempt to uncache remote emojis. + found, err := suite.cleaner.Emoji().UncacheRemote(ctx, olderThan) + if err != nil { + t.Errorf("error uncaching remote emojis: %v", err) + return + } + + // Check expected were uncached. + if found != len(uncacheIDs) { + t.Errorf("expected %d emojis to be uncached, %d were", len(uncacheIDs), found) + return + } + + if gtscontext.DryRun(ctx) { + // nothing else to test. + return + } + + for _, id := range uncacheIDs { + // Fetch the emoji by ID that should now be uncached. + emoji, err := suite.state.DB.GetEmojiByID(ctx, id) + if err != nil { + t.Fatalf("error fetching emoji from database: %v", err) + } + + // Check cache state. + if *emoji.Cached { + t.Errorf("emoji %s@%s should have been uncached", emoji.Shortcode, emoji.Domain) + } + + // Check that the emoji files in storage have been deleted. + if ok, err := suite.state.Storage.Has(ctx, emoji.ImagePath); err != nil { + t.Fatalf("error checking storage for emoji: %v", err) + } else if ok { + t.Errorf("emoji %s@%s image path should not exist", emoji.Shortcode, emoji.Domain) + } else if ok, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath); err != nil { + t.Fatalf("error checking storage for emoji: %v", err) + } else if ok { + t.Errorf("emoji %s@%s image static path should not exist", emoji.Shortcode, emoji.Domain) + } + } +} + +func (suite *CleanerTestSuite) shouldUncacheEmoji(ctx context.Context, emoji *gtsmodel.Emoji, after time.Time) (bool, error) { + if emoji.ImageRemoteURL == "" { + // Local emojis are never uncached. + return false, nil + } + + if emoji.Cached == nil || !*emoji.Cached { + // Emoji is already uncached. + return false, nil + } + + // Get related accounts using this emoji (if any). + accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) + if err != nil { + return false, err + } + + // Check if accounts are recently updated. + for _, account := range accounts { + if account.FetchedAt.After(after) { + return false, nil + } + } + + // Get related statuses using this emoji (if any). + statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) + if err != nil { + return false, err + } + + // Check if statuses are recently updated. + for _, status := range statuses { + if status.FetchedAt.After(after) { + return false, nil + } + } + + return true, nil +} + +func (suite *CleanerTestSuite) testEmojiFixBroken(ctx context.Context, emojis []*gtsmodel.Emoji) { + var fixIDs []string + + // Test state. + t := suite.T() + + for _, emoji := range emojis { + // Check whether this emoji should be fixed. + ok, err := suite.shouldFixBrokenEmoji(ctx, emoji) + if err != nil { + t.Fatalf("error checking whether emoji should be fixed: %v", err) + } + + if ok { + // Mark this emoji ID as to be fixed. + fixIDs = append(fixIDs, emoji.ID) + } + } + + // Attempt to fix broken emojis. + found, err := suite.cleaner.Emoji().FixBroken(ctx) + if err != nil { + t.Errorf("error fixing broken emojis: %v", err) + return + } + + // Check expected were fixed. + if found != len(fixIDs) { + t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found) + return + } + + if gtscontext.DryRun(ctx) { + // nothing else to test. + return + } + + for _, id := range fixIDs { + // Fetch the emoji by ID that should now be fixed. + emoji, err := suite.state.DB.GetEmojiByID(ctx, id) + if err != nil { + t.Fatalf("error fetching emoji from database: %v", err) + } + + // Ensure category was cleared. + if emoji.CategoryID != "" { + t.Errorf("emoji %s@%s should have empty category", emoji.Shortcode, emoji.Domain) + } + } +} + +func (suite *CleanerTestSuite) shouldFixBrokenEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + if emoji.CategoryID == "" { + // no category issue. + return false, nil + } + + // Get the related category for this emoji. + category, err := suite.state.DB.GetEmojiCategory(ctx, emoji.CategoryID) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return false, nil + } + + return (category == nil), nil +} + +func (suite *CleanerTestSuite) testEmojiPruneUnused(ctx context.Context, emojis []*gtsmodel.Emoji) { + var pruneIDs []string + + // Test state. + t := suite.T() + + for _, emoji := range emojis { + // Check whether this emoji should be pruned. + ok, err := suite.shouldPruneEmoji(ctx, emoji) + if err != nil { + t.Fatalf("error checking whether emoji should be pruned: %v", err) + } + + if ok { + // Mark this emoji ID as to be pruned. + pruneIDs = append(pruneIDs, emoji.ID) + } + } + + // Attempt to prune emojis. + found, err := suite.cleaner.Emoji().PruneUnused(ctx) + if err != nil { + t.Errorf("error fixing broken emojis: %v", err) + return + } + + // Check expected were pruned. + if found != len(pruneIDs) { + t.Errorf("expected %d emojis to be pruned, %d were", len(pruneIDs), found) + return + } + + if gtscontext.DryRun(ctx) { + // nothing else to test. + return + } + + for _, id := range pruneIDs { + // Fetch the emoji by ID that should now be pruned. + emoji, err := suite.state.DB.GetEmojiByID(ctx, id) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + t.Fatalf("error fetching emoji from database: %v", err) + } + + // Ensure gone. + if emoji != nil { + t.Errorf("emoji %s@%s should have been pruned", emoji.Shortcode, emoji.Domain) + } + } +} + +func (suite *CleanerTestSuite) shouldPruneEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + if emoji.ImageRemoteURL == "" { + // Local emojis are never pruned. + return false, nil + } + + // Get related accounts using this emoji (if any). + accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) + if err != nil { + return false, err + } else if len(accounts) > 0 { + return false, nil + } + + // Get related statuses using this emoji (if any). + statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) + if err != nil { + return false, err + } else if len(statuses) > 0 { + return false, nil + } + + return true, nil +} + +func (suite *CleanerTestSuite) testEmojiFixCacheStates(ctx context.Context, emojis []*gtsmodel.Emoji) { + var fixIDs []string + + // Test state. + t := suite.T() + + for _, emoji := range emojis { + // Check whether this emoji should be fixed. + ok, err := suite.shouldFixEmojiCacheState(ctx, emoji) + if err != nil { + t.Fatalf("error checking whether emoji should be fixed: %v", err) + } + + if ok { + // Mark this emoji ID as to be fixed. + fixIDs = append(fixIDs, emoji.ID) + } + } + + // Attempt to fix broken emoji cache states. + found, err := suite.cleaner.Emoji().FixCacheStates(ctx) + if err != nil { + t.Errorf("error fixing broken emojis: %v", err) + return + } + + // Check expected were fixed. + if found != len(fixIDs) { + t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found) + return + } + + if gtscontext.DryRun(ctx) { + // nothing else to test. + return + } + + for _, id := range fixIDs { + // Fetch the emoji by ID that should now be fixed. + emoji, err := suite.state.DB.GetEmojiByID(ctx, id) + if err != nil { + t.Fatalf("error fetching emoji from database: %v", err) + } + + // Ensure emoji cache state has been fixed. + ok, err := suite.shouldFixEmojiCacheState(ctx, emoji) + if err != nil { + t.Fatalf("error checking whether emoji should be fixed: %v", err) + } else if ok { + t.Errorf("emoji %s@%s cache state should have been fixed", emoji.Shortcode, emoji.Domain) + } + } +} + +func (suite *CleanerTestSuite) shouldFixEmojiCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + // Check whether emoji image path exists. + haveImage, err := suite.state.Storage.Has(ctx, emoji.ImagePath) + if err != nil { + return false, err + } + + // Check whether emoji static path exists. + haveStatic, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath) + if err != nil { + return false, err + } + + switch exists := (haveImage && haveStatic); { + case emoji.Cached != nil && + *emoji.Cached && !exists: + // (cached can be nil in tests) + // Cached but missing files. + return true, nil + + case emoji.Cached != nil && + !*emoji.Cached && exists: + // (cached can be nil in tests) + // Uncached but unexpected files. + return true, nil + + default: + // No cache state issue. + return false, nil + } +} diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go index 51a0aea6d..8b11a30bf 100644 --- a/internal/cleaner/media.go +++ b/internal/cleaner/media.go @@ -96,9 +96,9 @@ func (m *Media) PruneOrphaned(ctx context.Context) (int, error) { // All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext} if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error { + // Check for our expected fileserver path format. if !regexes.FilePath.MatchString(path) { - // This is not our expected media - // path format, skip this one. + log.Warn(ctx, "unexpected storage item: %s", path) return nil } @@ -177,10 +177,10 @@ func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er mostRecent := olderThan for { - // Fetch the next batch of attachments older than last-set time. - attachments, err := m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectLimit) + // Fetch the next batch of cached attachments older than last-set time. + attachments, err := m.state.DB.GetCachedAttachmentsOlderThan(ctx, olderThan, selectLimit) if err != nil && !errors.Is(err, db.ErrNoEntries) { - return total, gtserror.Newf("error getting remote media: %w", err) + return total, gtserror.Newf("error getting remote attachments: %w", err) } if len(attachments) == 0 { @@ -220,9 +220,9 @@ func (m *Media) FixCacheStates(ctx context.Context) (int, error) { for { // Fetch the next batch of media attachments up to next max ID. - attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) + attachments, err := m.state.DB.GetRemoteAttachments(ctx, maxID, selectLimit) if err != nil && !errors.Is(err, db.ErrNoEntries) { - return total, gtserror.Newf("error getting avatars / headers: %w", err) + return total, gtserror.Newf("error getting remote attachments: %w", err) } if len(attachments) == 0 { @@ -323,7 +323,7 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment l := log.WithContext(ctx). WithField("media", media.ID) - // Check whether we have the required account for media. + // Check whether we have the required account for media. account, missing, err := m.getRelatedAccount(ctx, media) if err != nil { return false, err @@ -367,14 +367,6 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment } func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { - if !*media.Cached { - // We ignore uncached media, a - // false negative is a much better - // situation than a false positive, - // re-cache will just overwrite it. - return false, nil - } - // Start a log entry for media. l := log.WithContext(ctx). WithField("media", media.ID) @@ -397,15 +389,33 @@ func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachme return false, nil } - // So we know this a valid cached media entry. - // Check that we have the files on disk required.... - return m.checkFiles(ctx, func() error { - l.Debug("uncaching due to missing media") - return m.uncache(ctx, media) - }, + // Check whether files exist. + exist, err := m.haveFiles(ctx, media.Thumbnail.Path, media.File.Path, ) + if err != nil { + return false, err + } + + switch { + case *media.Cached && !exist: + // Mark as uncached if expected files don't exist. + l.Debug("cached=true exists=false => uncaching") + return true, m.uncache(ctx, media) + + case !*media.Cached && exist: + // Remove files if we don't expect them to exist. + l.Debug("cached=false exists=true => deleting") + _, err := m.removeFiles(ctx, + media.Thumbnail.Path, + media.File.Path, + ) + return true, err + + default: + return false, nil + } } func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) { |