summaryrefslogtreecommitdiff
path: root/internal/cleaner
diff options
context:
space:
mode:
authorLibravatar kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com>2023-07-24 13:14:13 +0100
committerLibravatar GitHub <noreply@github.com>2023-07-24 13:14:13 +0100
commit9eff0d46e49b947dc2642207ee49ed657eb6b565 (patch)
tree62994afff170737d83f1ed911e385504a0ad16cd /internal/cleaner
parent[chore]: Bump github.com/microcosm-cc/bluemonday from 1.0.24 to 1.0.25 (#2021) (diff)
downloadgotosocial-9eff0d46e49b947dc2642207ee49ed657eb6b565.tar.xz
[feature/performance] support uncaching remote emoji + scheduled cleanup functions (#1987)
Diffstat (limited to 'internal/cleaner')
-rw-r--r--internal/cleaner/cleaner.go12
-rw-r--r--internal/cleaner/cleaner_test.go80
-rw-r--r--internal/cleaner/emoji.go317
-rw-r--r--internal/cleaner/emoji_test.go402
-rw-r--r--internal/cleaner/media.go54
5 files changed, 802 insertions, 63 deletions
diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go
index ee1e4785f..70497c10e 100644
--- a/internal/cleaner/cleaner.go
+++ b/internal/cleaner/cleaner.go
@@ -61,19 +61,19 @@ func (c *Cleaner) Media() *Media {
return &c.media
}
-// checkFiles checks for each of the provided files, and calls onMissing() if any of them are missing. Returns true if missing.
-func (c *Cleaner) checkFiles(ctx context.Context, onMissing func() error, files ...string) (bool, error) {
+// haveFiles returns whether all of the provided files exist within current storage.
+func (c *Cleaner) haveFiles(ctx context.Context, files ...string) (bool, error) {
for _, file := range files {
// Check whether each file exists in storage.
have, err := c.state.Storage.Has(ctx, file)
if err != nil {
return false, gtserror.Newf("error checking storage for %s: %w", file, err)
} else if !have {
- // Missing files, perform hook.
- return true, onMissing()
+ // Missing file(s).
+ return false, nil
}
}
- return false, nil
+ return true, nil
}
// removeFiles removes the provided files, returning the number of them returned.
@@ -129,7 +129,7 @@ func scheduleJobs(c *Cleaner) {
c.state.Workers.Scheduler.Schedule(sched.NewJob(func(start time.Time) {
log.Info(nil, "starting media clean")
c.Media().All(doneCtx, config.GetMediaRemoteCacheDays())
- c.Emoji().All(doneCtx)
+ c.Emoji().All(doneCtx, config.GetMediaRemoteCacheDays())
log.Infof(nil, "finished media clean after %s", time.Since(start))
}).EveryAt(midnight, day))
}
diff --git a/internal/cleaner/cleaner_test.go b/internal/cleaner/cleaner_test.go
new file mode 100644
index 000000000..d23dac504
--- /dev/null
+++ b/internal/cleaner/cleaner_test.go
@@ -0,0 +1,80 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package cleaner_test
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/internal/cleaner"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/state"
+ "github.com/superseriousbusiness/gotosocial/testrig"
+)
+
+type CleanerTestSuite struct {
+ state state.State
+ cleaner *cleaner.Cleaner
+ emojis map[string]*gtsmodel.Emoji
+ suite.Suite
+}
+
+func TestCleanerTestSuite(t *testing.T) {
+ suite.Run(t, &CleanerTestSuite{})
+}
+
+func (suite *CleanerTestSuite) SetupSuite() {
+ testrig.InitTestConfig()
+ testrig.InitTestLog()
+}
+
+func (suite *CleanerTestSuite) SetupTest() {
+ // Initialize gts caches.
+ suite.state.Caches.Init()
+
+ // Ensure scheduler started (even if unused).
+ suite.state.Workers.Scheduler.Start(nil)
+
+ // Initialize test database.
+ _ = testrig.NewTestDB(&suite.state)
+ testrig.StandardDBSetup(suite.state.DB, nil)
+
+ // Initialize test storage (in-memory).
+ suite.state.Storage = testrig.NewInMemoryStorage()
+
+ // Initialize test cleaner instance.
+ suite.cleaner = cleaner.New(&suite.state)
+
+ // Allocate new test model emojis.
+ suite.emojis = testrig.NewTestEmojis()
+}
+
+func (suite *CleanerTestSuite) TearDownTest() {
+ testrig.StandardDBTeardown(suite.state.DB)
+}
+
+// mapvals extracts a slice of values from the values contained within the map.
+func mapvals[Key comparable, Val any](m map[Key]Val) []Val {
+ var i int
+ vals := make([]Val, len(m))
+ for _, val := range m {
+ vals[i] = val
+ i++
+ }
+ return vals
+}
diff --git a/internal/cleaner/emoji.go b/internal/cleaner/emoji.go
index 35e579171..d2baec7e8 100644
--- a/internal/cleaner/emoji.go
+++ b/internal/cleaner/emoji.go
@@ -20,6 +20,7 @@ package cleaner
import (
"context"
"errors"
+ "time"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
@@ -36,22 +37,26 @@ type Emoji struct {
// All will execute all cleaner.Emoji utilities synchronously, including output logging.
// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action.
-func (e *Emoji) All(ctx context.Context) {
- e.LogPruneMissing(ctx)
+func (e *Emoji) All(ctx context.Context, maxRemoteDays int) {
+ t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays))
+ e.LogUncacheRemote(ctx, t)
e.LogFixBroken(ctx)
+ e.LogPruneUnused(ctx)
+ e.LogFixCacheStates(ctx)
+ _ = e.state.Storage.Storage.Clean(ctx)
}
-// LogPruneMissing performs emoji.PruneMissing(...), logging the start and outcome.
-func (e *Emoji) LogPruneMissing(ctx context.Context) {
- log.Info(ctx, "start")
- if n, err := e.PruneMissing(ctx); err != nil {
+// LogUncacheRemote performs Emoji.UncacheRemote(...), logging the start and outcome.
+func (e *Emoji) LogUncacheRemote(ctx context.Context, olderThan time.Time) {
+ log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp))
+ if n, err := e.UncacheRemote(ctx, olderThan); err != nil {
log.Error(ctx, err)
} else {
- log.Infof(ctx, "pruned: %d", n)
+ log.Infof(ctx, "uncached: %d", n)
}
}
-// LogFixBroken performs emoji.FixBroken(...), logging the start and outcome.
+// LogFixBroken performs Emoji.FixBroken(...), logging the start and outcome.
func (e *Emoji) LogFixBroken(ctx context.Context) {
log.Info(ctx, "start")
if n, err := e.FixBroken(ctx); err != nil {
@@ -61,10 +66,78 @@ func (e *Emoji) LogFixBroken(ctx context.Context) {
}
}
-// PruneMissing will delete emoji with missing files from the database and storage driver.
-// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function
-// should be updated to match media.FixCacheStat() if we ever support emoji uncaching.
-func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {
+// LogPruneUnused performs Emoji.PruneUnused(...), logging the start and outcome.
+func (e *Emoji) LogPruneUnused(ctx context.Context) {
+ log.Info(ctx, "start")
+ if n, err := e.PruneUnused(ctx); err != nil {
+ log.Error(ctx, err)
+ } else {
+ log.Infof(ctx, "pruned: %d", n)
+ }
+}
+
+// LogFixCacheStates performs Emoji.FixCacheStates(...), logging the start and outcome.
+func (e *Emoji) LogFixCacheStates(ctx context.Context) {
+ log.Info(ctx, "start")
+ if n, err := e.FixCacheStates(ctx); err != nil {
+ log.Error(ctx, err)
+ } else {
+ log.Infof(ctx, "fixed: %d", n)
+ }
+}
+
+// UncacheRemote will uncache all remote emoji older than given input time. Context
+// will be checked for `gtscontext.DryRun()` in order to actually perform the action.
+func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) {
+ var total int
+
+ // Drop time by a minute to improve search,
+ // (i.e. make it olderThan inclusive search).
+ olderThan = olderThan.Add(-time.Minute)
+
+ // Store recent time.
+ mostRecent := olderThan
+
+ for {
+ // Fetch the next batch of cached emojis older than last-set time.
+ emojis, err := e.state.DB.GetCachedEmojisOlderThan(ctx, olderThan, selectLimit)
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ return total, gtserror.Newf("error getting remote emoji: %w", err)
+ }
+
+ if len(emojis) == 0 {
+ // reached end.
+ break
+ }
+
+ // Use last created-at as the next 'olderThan' value.
+ olderThan = emojis[len(emojis)-1].CreatedAt
+
+ for _, emoji := range emojis {
+ // Check / uncache each remote emoji.
+ uncached, err := e.uncacheRemote(ctx,
+ mostRecent,
+ emoji,
+ )
+ if err != nil {
+ return total, err
+ }
+
+ if uncached {
+ // Update
+ // count.
+ total++
+ }
+ }
+ }
+
+ return total, nil
+}
+
+// FixBroken will check all emojis for valid related models (e.g. category).
+// Broken media will be automatically updated to remove now-missing models.
+// Context will be checked for `gtscontext.DryRun()` to perform the action.
+func (e *Emoji) FixBroken(ctx context.Context) (int, error) {
var (
total int
maxID string
@@ -86,8 +159,8 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {
maxID = emojis[len(emojis)-1].ID
for _, emoji := range emojis {
- // Check / fix missing emoji media.
- fixed, err := e.pruneMissing(ctx, emoji)
+ // Check / fix missing broken emoji.
+ fixed, err := e.fixBroken(ctx, emoji)
if err != nil {
return total, err
}
@@ -103,10 +176,10 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {
return total, nil
}
-// FixBroken will check all emojis for valid related models (e.g. category).
-// Broken media will be automatically updated to remove now-missing models.
-// Context will be checked for `gtscontext.DryRun()` to perform the action.
-func (e *Emoji) FixBroken(ctx context.Context) (int, error) {
+// PruneUnused will delete all unused emoji media from the database and storage driver.
+// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function
+// should be updated to match media.FixCacheStat() if we ever support emoji uncaching.
+func (e *Emoji) PruneUnused(ctx context.Context) (int, error) {
var (
total int
maxID string
@@ -114,9 +187,9 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {
for {
// Fetch the next batch of emoji media up to next ID.
- emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit)
+ emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
- return total, gtserror.Newf("error getting emojis: %w", err)
+ return total, gtserror.Newf("error getting remote emojis: %w", err)
}
if len(emojis) == 0 {
@@ -128,8 +201,50 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {
maxID = emojis[len(emojis)-1].ID
for _, emoji := range emojis {
- // Check / fix missing broken emoji.
- fixed, err := e.fixBroken(ctx, emoji)
+ // Check / prune unused emoji media.
+ fixed, err := e.pruneUnused(ctx, emoji)
+ if err != nil {
+ return total, err
+ }
+
+ if fixed {
+ // Update
+ // count.
+ total++
+ }
+ }
+ }
+
+ return total, nil
+}
+
+// FixCacheStatus will check all emoji for up-to-date cache status (i.e. in storage driver).
+// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function
+// should be updated to match media.FixCacheStat() if we ever support emoji uncaching.
+func (e *Emoji) FixCacheStates(ctx context.Context) (int, error) {
+ var (
+ total int
+ maxID string
+ )
+
+ for {
+ // Fetch the next batch of emoji media up to next ID.
+ emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit)
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ return total, gtserror.Newf("error getting remote emojis: %w", err)
+ }
+
+ if len(emojis) == 0 {
+ // reached end.
+ break
+ }
+
+ // Use last as the next 'maxID' value.
+ maxID = emojis[len(emojis)-1].ID
+
+ for _, emoji := range emojis {
+ // Check / fix required emoji cache states.
+ fixed, err := e.fixCacheState(ctx, emoji)
if err != nil {
return total, err
}
@@ -145,22 +260,113 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {
return total, nil
}
-func (e *Emoji) pruneMissing(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
- return e.checkFiles(ctx, func() error {
- // Emoji missing files, delete it.
- // NOTE: if we ever support uncaching
- // of emojis, change to e.uncache().
- // In that case we should also rename
- // this function to match the media
- // equivalent -> fixCacheState().
- log.WithContext(ctx).
- WithField("emoji", emoji.ID).
- Debug("deleting due to missing emoji")
- return e.delete(ctx, emoji)
- },
+func (e *Emoji) pruneUnused(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
+ // Start a log entry for emoji.
+ l := log.WithContext(ctx).
+ WithField("emoji", emoji.ID)
+
+ // Load any related accounts using this emoji.
+ accounts, err := e.getRelatedAccounts(ctx, emoji)
+ if err != nil {
+ return false, err
+ } else if len(accounts) > 0 {
+ l.Debug("skipping as account emoji in use")
+ return false, nil
+ }
+
+ // Load any related statuses using this emoji.
+ statuses, err := e.getRelatedStatuses(ctx, emoji)
+ if err != nil {
+ return false, err
+ } else if len(statuses) > 0 {
+ l.Debug("skipping as status emoji in use")
+ return false, nil
+ }
+
+ // Check not recently created, give it some time to be "used" again.
+ if time.Now().Add(-24 * time.Hour * 7).Before(emoji.CreatedAt) {
+ l.Debug("skipping due to recently created")
+ return false, nil
+ }
+
+ // Emoji totally unused, delete it.
+ l.Debug("deleting unused emoji")
+ return true, e.delete(ctx, emoji)
+}
+
+func (e *Emoji) fixCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
+ // Start a log entry for emoji.
+ l := log.WithContext(ctx).
+ WithField("emoji", emoji.ID)
+
+ // Check whether files exist.
+ exist, err := e.haveFiles(ctx,
emoji.ImageStaticPath,
emoji.ImagePath,
)
+ if err != nil {
+ return false, err
+ }
+
+ switch {
+ case *emoji.Cached && !exist:
+ // Mark as uncached if expected files don't exist.
+ l.Debug("cached=true exists=false => marking uncached")
+ return true, e.uncache(ctx, emoji)
+
+ case !*emoji.Cached && exist:
+ // Remove files if we don't expect them to exist.
+ l.Debug("cached=false exists=true => removing files")
+ _, err := e.removeFiles(ctx,
+ emoji.ImageStaticPath,
+ emoji.ImagePath,
+ )
+ return true, err
+
+ default:
+ return false, nil
+ }
+}
+
+func (e *Emoji) uncacheRemote(ctx context.Context, after time.Time, emoji *gtsmodel.Emoji) (bool, error) {
+ if !*emoji.Cached {
+ // Already uncached.
+ return false, nil
+ }
+
+ // Start a log entry for emoji.
+ l := log.WithContext(ctx).
+ WithField("emoji", emoji.ID)
+
+ // Load any related accounts using this emoji.
+ accounts, err := e.getRelatedAccounts(ctx, emoji)
+ if err != nil {
+ return false, err
+ }
+
+ for _, account := range accounts {
+ if account.FetchedAt.After(after) {
+ l.Debug("skipping due to recently fetched account")
+ return false, nil
+ }
+ }
+
+ // Load any related statuses using this emoji.
+ statuses, err := e.getRelatedStatuses(ctx, emoji)
+ if err != nil {
+ return false, err
+ }
+
+ for _, status := range statuses {
+ if status.FetchedAt.After(after) {
+ l.Debug("skipping due to recently fetched status")
+ return false, nil
+ }
+ }
+
+ // This emoji is too old, uncache it.
+ l.Debug("uncaching old remote emoji")
+ return true, e.uncache(ctx, emoji)
}
func (e *Emoji) fixBroken(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
@@ -214,6 +420,47 @@ func (e *Emoji) getRelatedCategory(ctx context.Context, emoji *gtsmodel.Emoji) (
return category, false, nil
}
+func (e *Emoji) getRelatedAccounts(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Account, error) {
+ accounts, err := e.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID)
+ if err != nil {
+ return nil, gtserror.Newf("error fetching accounts using emoji %s: %w", emoji.ID, err)
+ }
+ return accounts, nil
+}
+
+func (e *Emoji) getRelatedStatuses(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Status, error) {
+ statuses, err := e.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID)
+ if err != nil {
+ return nil, gtserror.Newf("error fetching statuses using emoji %s: %w", emoji.ID, err)
+ }
+ return statuses, nil
+}
+
+func (e *Emoji) uncache(ctx context.Context, emoji *gtsmodel.Emoji) error {
+ if gtscontext.DryRun(ctx) {
+ // Dry run, do nothing.
+ return nil
+ }
+
+ // Remove emoji and static.
+ _, err := e.removeFiles(ctx,
+ emoji.ImagePath,
+ emoji.ImageStaticPath,
+ )
+ if err != nil {
+ return gtserror.Newf("error removing emoji files: %w", err)
+ }
+
+ // Update emoji to reflect that we no longer have it cached.
+ log.Debugf(ctx, "marking emoji as uncached: %s", emoji.ID)
+ emoji.Cached = func() *bool { i := false; return &i }()
+ if err := e.state.DB.UpdateEmoji(ctx, emoji, "cached"); err != nil {
+ return gtserror.Newf("error updating emoji: %w", err)
+ }
+
+ return nil
+}
+
func (e *Emoji) delete(ctx context.Context, emoji *gtsmodel.Emoji) error {
if gtscontext.DryRun(ctx) {
// Dry run, do nothing.
diff --git a/internal/cleaner/emoji_test.go b/internal/cleaner/emoji_test.go
new file mode 100644
index 000000000..81fde6e48
--- /dev/null
+++ b/internal/cleaner/emoji_test.go
@@ -0,0 +1,402 @@
+package cleaner_test
+
+import (
+ "context"
+ "errors"
+ "time"
+
+ "github.com/superseriousbusiness/gotosocial/internal/config"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/gtscontext"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+)
+
+func (suite *CleanerTestSuite) TestEmojiUncacheRemote() {
+ suite.testEmojiUncacheRemote(
+ context.Background(),
+ mapvals(suite.emojis),
+ )
+}
+
+func (suite *CleanerTestSuite) TestEmojiUncacheRemoteDryRun() {
+ suite.testEmojiUncacheRemote(
+ gtscontext.SetDryRun(context.Background()),
+ mapvals(suite.emojis),
+ )
+}
+
+func (suite *CleanerTestSuite) TestEmojiFixBroken() {
+ suite.testEmojiFixBroken(
+ context.Background(),
+ mapvals(suite.emojis),
+ )
+}
+
+func (suite *CleanerTestSuite) TestEmojiFixBrokenDryRun() {
+ suite.testEmojiFixBroken(
+ gtscontext.SetDryRun(context.Background()),
+ mapvals(suite.emojis),
+ )
+}
+
+func (suite *CleanerTestSuite) TestEmojiPruneUnused() {
+ suite.testEmojiPruneUnused(
+ context.Background(),
+ mapvals(suite.emojis),
+ )
+}
+
+func (suite *CleanerTestSuite) TestEmojiPruneUnusedDryRun() {
+ suite.testEmojiPruneUnused(
+ gtscontext.SetDryRun(context.Background()),
+ mapvals(suite.emojis),
+ )
+}
+
+func (suite *CleanerTestSuite) TestEmojiFixCacheStates() {
+ suite.testEmojiFixCacheStates(
+ context.Background(),
+ mapvals(suite.emojis),
+ )
+}
+
+func (suite *CleanerTestSuite) TestEmojiFixCacheStatesDryRun() {
+ suite.testEmojiFixCacheStates(
+ gtscontext.SetDryRun(context.Background()),
+ mapvals(suite.emojis),
+ )
+}
+
+func (suite *CleanerTestSuite) testEmojiUncacheRemote(ctx context.Context, emojis []*gtsmodel.Emoji) {
+ var uncacheIDs []string
+
+ // Test state.
+ t := suite.T()
+
+ // Get max remote cache days to keep.
+ days := config.GetMediaRemoteCacheDays()
+ olderThan := time.Now().Add(-24 * time.Hour * time.Duration(days))
+
+ for _, emoji := range emojis {
+ // Check whether this emoji should be uncached.
+ ok, err := suite.shouldUncacheEmoji(ctx, emoji, olderThan)
+ if err != nil {
+ t.Fatalf("error checking whether emoji should be uncached: %v", err)
+ }
+
+ if ok {
+ // Mark this emoji ID as to be uncached.
+ uncacheIDs = append(uncacheIDs, emoji.ID)
+ }
+ }
+
+ // Attempt to uncache remote emojis.
+ found, err := suite.cleaner.Emoji().UncacheRemote(ctx, olderThan)
+ if err != nil {
+ t.Errorf("error uncaching remote emojis: %v", err)
+ return
+ }
+
+ // Check expected were uncached.
+ if found != len(uncacheIDs) {
+ t.Errorf("expected %d emojis to be uncached, %d were", len(uncacheIDs), found)
+ return
+ }
+
+ if gtscontext.DryRun(ctx) {
+ // nothing else to test.
+ return
+ }
+
+ for _, id := range uncacheIDs {
+ // Fetch the emoji by ID that should now be uncached.
+ emoji, err := suite.state.DB.GetEmojiByID(ctx, id)
+ if err != nil {
+ t.Fatalf("error fetching emoji from database: %v", err)
+ }
+
+ // Check cache state.
+ if *emoji.Cached {
+ t.Errorf("emoji %s@%s should have been uncached", emoji.Shortcode, emoji.Domain)
+ }
+
+ // Check that the emoji files in storage have been deleted.
+ if ok, err := suite.state.Storage.Has(ctx, emoji.ImagePath); err != nil {
+ t.Fatalf("error checking storage for emoji: %v", err)
+ } else if ok {
+ t.Errorf("emoji %s@%s image path should not exist", emoji.Shortcode, emoji.Domain)
+ } else if ok, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath); err != nil {
+ t.Fatalf("error checking storage for emoji: %v", err)
+ } else if ok {
+ t.Errorf("emoji %s@%s image static path should not exist", emoji.Shortcode, emoji.Domain)
+ }
+ }
+}
+
+func (suite *CleanerTestSuite) shouldUncacheEmoji(ctx context.Context, emoji *gtsmodel.Emoji, after time.Time) (bool, error) {
+ if emoji.ImageRemoteURL == "" {
+ // Local emojis are never uncached.
+ return false, nil
+ }
+
+ if emoji.Cached == nil || !*emoji.Cached {
+ // Emoji is already uncached.
+ return false, nil
+ }
+
+ // Get related accounts using this emoji (if any).
+ accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID)
+ if err != nil {
+ return false, err
+ }
+
+ // Check if accounts are recently updated.
+ for _, account := range accounts {
+ if account.FetchedAt.After(after) {
+ return false, nil
+ }
+ }
+
+ // Get related statuses using this emoji (if any).
+ statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID)
+ if err != nil {
+ return false, err
+ }
+
+ // Check if statuses are recently updated.
+ for _, status := range statuses {
+ if status.FetchedAt.After(after) {
+ return false, nil
+ }
+ }
+
+ return true, nil
+}
+
+func (suite *CleanerTestSuite) testEmojiFixBroken(ctx context.Context, emojis []*gtsmodel.Emoji) {
+ var fixIDs []string
+
+ // Test state.
+ t := suite.T()
+
+ for _, emoji := range emojis {
+ // Check whether this emoji should be fixed.
+ ok, err := suite.shouldFixBrokenEmoji(ctx, emoji)
+ if err != nil {
+ t.Fatalf("error checking whether emoji should be fixed: %v", err)
+ }
+
+ if ok {
+ // Mark this emoji ID as to be fixed.
+ fixIDs = append(fixIDs, emoji.ID)
+ }
+ }
+
+ // Attempt to fix broken emojis.
+ found, err := suite.cleaner.Emoji().FixBroken(ctx)
+ if err != nil {
+ t.Errorf("error fixing broken emojis: %v", err)
+ return
+ }
+
+ // Check expected were fixed.
+ if found != len(fixIDs) {
+ t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found)
+ return
+ }
+
+ if gtscontext.DryRun(ctx) {
+ // nothing else to test.
+ return
+ }
+
+ for _, id := range fixIDs {
+ // Fetch the emoji by ID that should now be fixed.
+ emoji, err := suite.state.DB.GetEmojiByID(ctx, id)
+ if err != nil {
+ t.Fatalf("error fetching emoji from database: %v", err)
+ }
+
+ // Ensure category was cleared.
+ if emoji.CategoryID != "" {
+ t.Errorf("emoji %s@%s should have empty category", emoji.Shortcode, emoji.Domain)
+ }
+ }
+}
+
+func (suite *CleanerTestSuite) shouldFixBrokenEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
+ if emoji.CategoryID == "" {
+ // no category issue.
+ return false, nil
+ }
+
+ // Get the related category for this emoji.
+ category, err := suite.state.DB.GetEmojiCategory(ctx, emoji.CategoryID)
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ return false, nil
+ }
+
+ return (category == nil), nil
+}
+
+func (suite *CleanerTestSuite) testEmojiPruneUnused(ctx context.Context, emojis []*gtsmodel.Emoji) {
+ var pruneIDs []string
+
+ // Test state.
+ t := suite.T()
+
+ for _, emoji := range emojis {
+ // Check whether this emoji should be pruned.
+ ok, err := suite.shouldPruneEmoji(ctx, emoji)
+ if err != nil {
+ t.Fatalf("error checking whether emoji should be pruned: %v", err)
+ }
+
+ if ok {
+ // Mark this emoji ID as to be pruned.
+ pruneIDs = append(pruneIDs, emoji.ID)
+ }
+ }
+
+ // Attempt to prune emojis.
+ found, err := suite.cleaner.Emoji().PruneUnused(ctx)
+ if err != nil {
+ t.Errorf("error fixing broken emojis: %v", err)
+ return
+ }
+
+ // Check expected were pruned.
+ if found != len(pruneIDs) {
+ t.Errorf("expected %d emojis to be pruned, %d were", len(pruneIDs), found)
+ return
+ }
+
+ if gtscontext.DryRun(ctx) {
+ // nothing else to test.
+ return
+ }
+
+ for _, id := range pruneIDs {
+ // Fetch the emoji by ID that should now be pruned.
+ emoji, err := suite.state.DB.GetEmojiByID(ctx, id)
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ t.Fatalf("error fetching emoji from database: %v", err)
+ }
+
+ // Ensure gone.
+ if emoji != nil {
+ t.Errorf("emoji %s@%s should have been pruned", emoji.Shortcode, emoji.Domain)
+ }
+ }
+}
+
+func (suite *CleanerTestSuite) shouldPruneEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
+ if emoji.ImageRemoteURL == "" {
+ // Local emojis are never pruned.
+ return false, nil
+ }
+
+ // Get related accounts using this emoji (if any).
+ accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID)
+ if err != nil {
+ return false, err
+ } else if len(accounts) > 0 {
+ return false, nil
+ }
+
+ // Get related statuses using this emoji (if any).
+ statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID)
+ if err != nil {
+ return false, err
+ } else if len(statuses) > 0 {
+ return false, nil
+ }
+
+ return true, nil
+}
+
+func (suite *CleanerTestSuite) testEmojiFixCacheStates(ctx context.Context, emojis []*gtsmodel.Emoji) {
+ var fixIDs []string
+
+ // Test state.
+ t := suite.T()
+
+ for _, emoji := range emojis {
+ // Check whether this emoji should be fixed.
+ ok, err := suite.shouldFixEmojiCacheState(ctx, emoji)
+ if err != nil {
+ t.Fatalf("error checking whether emoji should be fixed: %v", err)
+ }
+
+ if ok {
+ // Mark this emoji ID as to be fixed.
+ fixIDs = append(fixIDs, emoji.ID)
+ }
+ }
+
+ // Attempt to fix broken emoji cache states.
+ found, err := suite.cleaner.Emoji().FixCacheStates(ctx)
+ if err != nil {
+ t.Errorf("error fixing broken emojis: %v", err)
+ return
+ }
+
+ // Check expected were fixed.
+ if found != len(fixIDs) {
+ t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found)
+ return
+ }
+
+ if gtscontext.DryRun(ctx) {
+ // nothing else to test.
+ return
+ }
+
+ for _, id := range fixIDs {
+ // Fetch the emoji by ID that should now be fixed.
+ emoji, err := suite.state.DB.GetEmojiByID(ctx, id)
+ if err != nil {
+ t.Fatalf("error fetching emoji from database: %v", err)
+ }
+
+ // Ensure emoji cache state has been fixed.
+ ok, err := suite.shouldFixEmojiCacheState(ctx, emoji)
+ if err != nil {
+ t.Fatalf("error checking whether emoji should be fixed: %v", err)
+ } else if ok {
+ t.Errorf("emoji %s@%s cache state should have been fixed", emoji.Shortcode, emoji.Domain)
+ }
+ }
+}
+
+func (suite *CleanerTestSuite) shouldFixEmojiCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) {
+ // Check whether emoji image path exists.
+ haveImage, err := suite.state.Storage.Has(ctx, emoji.ImagePath)
+ if err != nil {
+ return false, err
+ }
+
+ // Check whether emoji static path exists.
+ haveStatic, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath)
+ if err != nil {
+ return false, err
+ }
+
+ switch exists := (haveImage && haveStatic); {
+ case emoji.Cached != nil &&
+ *emoji.Cached && !exists:
+ // (cached can be nil in tests)
+ // Cached but missing files.
+ return true, nil
+
+ case emoji.Cached != nil &&
+ !*emoji.Cached && exists:
+ // (cached can be nil in tests)
+ // Uncached but unexpected files.
+ return true, nil
+
+ default:
+ // No cache state issue.
+ return false, nil
+ }
+}
diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go
index 51a0aea6d..8b11a30bf 100644
--- a/internal/cleaner/media.go
+++ b/internal/cleaner/media.go
@@ -96,9 +96,9 @@ func (m *Media) PruneOrphaned(ctx context.Context) (int, error) {
// All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext}
if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error {
+ // Check for our expected fileserver path format.
if !regexes.FilePath.MatchString(path) {
- // This is not our expected media
- // path format, skip this one.
+ log.Warn(ctx, "unexpected storage item: %s", path)
return nil
}
@@ -177,10 +177,10 @@ func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er
mostRecent := olderThan
for {
- // Fetch the next batch of attachments older than last-set time.
- attachments, err := m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectLimit)
+ // Fetch the next batch of cached attachments older than last-set time.
+ attachments, err := m.state.DB.GetCachedAttachmentsOlderThan(ctx, olderThan, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
- return total, gtserror.Newf("error getting remote media: %w", err)
+ return total, gtserror.Newf("error getting remote attachments: %w", err)
}
if len(attachments) == 0 {
@@ -220,9 +220,9 @@ func (m *Media) FixCacheStates(ctx context.Context) (int, error) {
for {
// Fetch the next batch of media attachments up to next max ID.
- attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit)
+ attachments, err := m.state.DB.GetRemoteAttachments(ctx, maxID, selectLimit)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
- return total, gtserror.Newf("error getting avatars / headers: %w", err)
+ return total, gtserror.Newf("error getting remote attachments: %w", err)
}
if len(attachments) == 0 {
@@ -323,7 +323,7 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment
l := log.WithContext(ctx).
WithField("media", media.ID)
- // Check whether we have the required account for media.
+ // Check whether we have the required account for media.
account, missing, err := m.getRelatedAccount(ctx, media)
if err != nil {
return false, err
@@ -367,14 +367,6 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment
}
func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) {
- if !*media.Cached {
- // We ignore uncached media, a
- // false negative is a much better
- // situation than a false positive,
- // re-cache will just overwrite it.
- return false, nil
- }
-
// Start a log entry for media.
l := log.WithContext(ctx).
WithField("media", media.ID)
@@ -397,15 +389,33 @@ func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachme
return false, nil
}
- // So we know this a valid cached media entry.
- // Check that we have the files on disk required....
- return m.checkFiles(ctx, func() error {
- l.Debug("uncaching due to missing media")
- return m.uncache(ctx, media)
- },
+ // Check whether files exist.
+ exist, err := m.haveFiles(ctx,
media.Thumbnail.Path,
media.File.Path,
)
+ if err != nil {
+ return false, err
+ }
+
+ switch {
+ case *media.Cached && !exist:
+ // Mark as uncached if expected files don't exist.
+ l.Debug("cached=true exists=false => uncaching")
+ return true, m.uncache(ctx, media)
+
+ case !*media.Cached && exist:
+ // Remove files if we don't expect them to exist.
+ l.Debug("cached=false exists=true => deleting")
+ _, err := m.removeFiles(ctx,
+ media.Thumbnail.Path,
+ media.File.Path,
+ )
+ return true, err
+
+ default:
+ return false, nil
+ }
}
func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) {