diff options
| author | 2023-07-24 13:14:13 +0100 | |
|---|---|---|
| committer | 2023-07-24 13:14:13 +0100 | |
| commit | 9eff0d46e49b947dc2642207ee49ed657eb6b565 (patch) | |
| tree | 62994afff170737d83f1ed911e385504a0ad16cd /internal/cleaner | |
| parent | [chore]: Bump github.com/microcosm-cc/bluemonday from 1.0.24 to 1.0.25 (#2021) (diff) | |
| download | gotosocial-9eff0d46e49b947dc2642207ee49ed657eb6b565.tar.xz | |
[feature/performance] support uncaching remote emoji + scheduled cleanup functions (#1987)
Diffstat (limited to 'internal/cleaner')
| -rw-r--r-- | internal/cleaner/cleaner.go | 12 | ||||
| -rw-r--r-- | internal/cleaner/cleaner_test.go | 80 | ||||
| -rw-r--r-- | internal/cleaner/emoji.go | 317 | ||||
| -rw-r--r-- | internal/cleaner/emoji_test.go | 402 | ||||
| -rw-r--r-- | internal/cleaner/media.go | 54 | 
5 files changed, 802 insertions, 63 deletions
diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go index ee1e4785f..70497c10e 100644 --- a/internal/cleaner/cleaner.go +++ b/internal/cleaner/cleaner.go @@ -61,19 +61,19 @@ func (c *Cleaner) Media() *Media {  	return &c.media  } -// checkFiles checks for each of the provided files, and calls onMissing() if any of them are missing. Returns true if missing. -func (c *Cleaner) checkFiles(ctx context.Context, onMissing func() error, files ...string) (bool, error) { +// haveFiles returns whether all of the provided files exist within current storage. +func (c *Cleaner) haveFiles(ctx context.Context, files ...string) (bool, error) {  	for _, file := range files {  		// Check whether each file exists in storage.  		have, err := c.state.Storage.Has(ctx, file)  		if err != nil {  			return false, gtserror.Newf("error checking storage for %s: %w", file, err)  		} else if !have { -			// Missing files, perform hook. -			return true, onMissing() +			// Missing file(s). +			return false, nil  		}  	} -	return false, nil +	return true, nil  }  // removeFiles removes the provided files, returning the number of them returned. @@ -129,7 +129,7 @@ func scheduleJobs(c *Cleaner) {  	c.state.Workers.Scheduler.Schedule(sched.NewJob(func(start time.Time) {  		log.Info(nil, "starting media clean")  		c.Media().All(doneCtx, config.GetMediaRemoteCacheDays()) -		c.Emoji().All(doneCtx) +		c.Emoji().All(doneCtx, config.GetMediaRemoteCacheDays())  		log.Infof(nil, "finished media clean after %s", time.Since(start))  	}).EveryAt(midnight, day))  } diff --git a/internal/cleaner/cleaner_test.go b/internal/cleaner/cleaner_test.go new file mode 100644 index 000000000..d23dac504 --- /dev/null +++ b/internal/cleaner/cleaner_test.go @@ -0,0 +1,80 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. + +package cleaner_test + +import ( +	"testing" + +	"github.com/stretchr/testify/suite" +	"github.com/superseriousbusiness/gotosocial/internal/cleaner" +	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/state" +	"github.com/superseriousbusiness/gotosocial/testrig" +) + +type CleanerTestSuite struct { +	state   state.State +	cleaner *cleaner.Cleaner +	emojis  map[string]*gtsmodel.Emoji +	suite.Suite +} + +func TestCleanerTestSuite(t *testing.T) { +	suite.Run(t, &CleanerTestSuite{}) +} + +func (suite *CleanerTestSuite) SetupSuite() { +	testrig.InitTestConfig() +	testrig.InitTestLog() +} + +func (suite *CleanerTestSuite) SetupTest() { +	// Initialize gts caches. +	suite.state.Caches.Init() + +	// Ensure scheduler started (even if unused). +	suite.state.Workers.Scheduler.Start(nil) + +	// Initialize test database. +	_ = testrig.NewTestDB(&suite.state) +	testrig.StandardDBSetup(suite.state.DB, nil) + +	// Initialize test storage (in-memory). +	suite.state.Storage = testrig.NewInMemoryStorage() + +	// Initialize test cleaner instance. +	suite.cleaner = cleaner.New(&suite.state) + +	// Allocate new test model emojis. +	suite.emojis = testrig.NewTestEmojis() +} + +func (suite *CleanerTestSuite) TearDownTest() { +	testrig.StandardDBTeardown(suite.state.DB) +} + +// mapvals extracts a slice of values from the values contained within the map. +func mapvals[Key comparable, Val any](m map[Key]Val) []Val { +	var i int +	vals := make([]Val, len(m)) +	for _, val := range m { +		vals[i] = val +		i++ +	} +	return vals +} diff --git a/internal/cleaner/emoji.go b/internal/cleaner/emoji.go index 35e579171..d2baec7e8 100644 --- a/internal/cleaner/emoji.go +++ b/internal/cleaner/emoji.go @@ -20,6 +20,7 @@ package cleaner  import (  	"context"  	"errors" +	"time"  	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/superseriousbusiness/gotosocial/internal/gtscontext" @@ -36,22 +37,26 @@ type Emoji struct {  // All will execute all cleaner.Emoji utilities synchronously, including output logging.  // Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. -func (e *Emoji) All(ctx context.Context) { -	e.LogPruneMissing(ctx) +func (e *Emoji) All(ctx context.Context, maxRemoteDays int) { +	t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays)) +	e.LogUncacheRemote(ctx, t)  	e.LogFixBroken(ctx) +	e.LogPruneUnused(ctx) +	e.LogFixCacheStates(ctx) +	_ = e.state.Storage.Storage.Clean(ctx)  } -// LogPruneMissing performs emoji.PruneMissing(...), logging the start and outcome. -func (e *Emoji) LogPruneMissing(ctx context.Context) { -	log.Info(ctx, "start") -	if n, err := e.PruneMissing(ctx); err != nil { +// LogUncacheRemote performs Emoji.UncacheRemote(...), logging the start and outcome. +func (e *Emoji) LogUncacheRemote(ctx context.Context, olderThan time.Time) { +	log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp)) +	if n, err := e.UncacheRemote(ctx, olderThan); err != nil {  		log.Error(ctx, err)  	} else { -		log.Infof(ctx, "pruned: %d", n) +		log.Infof(ctx, "uncached: %d", n)  	}  } -// LogFixBroken performs emoji.FixBroken(...), logging the start and outcome. +// LogFixBroken performs Emoji.FixBroken(...), logging the start and outcome.  func (e *Emoji) LogFixBroken(ctx context.Context) {  	log.Info(ctx, "start")  	if n, err := e.FixBroken(ctx); err != nil { @@ -61,10 +66,78 @@ func (e *Emoji) LogFixBroken(ctx context.Context) {  	}  } -// PruneMissing will delete emoji with missing files from the database and storage driver. -// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function -// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. -func (e *Emoji) PruneMissing(ctx context.Context) (int, error) { +// LogPruneUnused performs Emoji.PruneUnused(...), logging the start and outcome. +func (e *Emoji) LogPruneUnused(ctx context.Context) { +	log.Info(ctx, "start") +	if n, err := e.PruneUnused(ctx); err != nil { +		log.Error(ctx, err) +	} else { +		log.Infof(ctx, "pruned: %d", n) +	} +} + +// LogFixCacheStates performs Emoji.FixCacheStates(...), logging the start and outcome. +func (e *Emoji) LogFixCacheStates(ctx context.Context) { +	log.Info(ctx, "start") +	if n, err := e.FixCacheStates(ctx); err != nil { +		log.Error(ctx, err) +	} else { +		log.Infof(ctx, "fixed: %d", n) +	} +} + +// UncacheRemote will uncache all remote emoji older than given input time. Context +// will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) { +	var total int + +	// Drop time by a minute to improve search, +	// (i.e. make it olderThan inclusive search). +	olderThan = olderThan.Add(-time.Minute) + +	// Store recent time. +	mostRecent := olderThan + +	for { +		// Fetch the next batch of cached emojis older than last-set time. +		emojis, err := e.state.DB.GetCachedEmojisOlderThan(ctx, olderThan, selectLimit) +		if err != nil && !errors.Is(err, db.ErrNoEntries) { +			return total, gtserror.Newf("error getting remote emoji: %w", err) +		} + +		if len(emojis) == 0 { +			// reached end. +			break +		} + +		// Use last created-at as the next 'olderThan' value. +		olderThan = emojis[len(emojis)-1].CreatedAt + +		for _, emoji := range emojis { +			// Check / uncache each remote emoji. +			uncached, err := e.uncacheRemote(ctx, +				mostRecent, +				emoji, +			) +			if err != nil { +				return total, err +			} + +			if uncached { +				// Update +				// count. +				total++ +			} +		} +	} + +	return total, nil +} + +// FixBroken will check all emojis for valid related models (e.g. category). +// Broken media will be automatically updated to remove now-missing models. +// Context will be checked for `gtscontext.DryRun()` to perform the action. +func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  	var (  		total int  		maxID string @@ -86,8 +159,8 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {  		maxID = emojis[len(emojis)-1].ID  		for _, emoji := range emojis { -			// Check / fix missing emoji media. -			fixed, err := e.pruneMissing(ctx, emoji) +			// Check / fix missing broken emoji. +			fixed, err := e.fixBroken(ctx, emoji)  			if err != nil {  				return total, err  			} @@ -103,10 +176,10 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {  	return total, nil  } -// FixBroken will check all emojis for valid related models (e.g. category). -// Broken media will be automatically updated to remove now-missing models. -// Context will be checked for `gtscontext.DryRun()` to perform the action. -func (e *Emoji) FixBroken(ctx context.Context) (int, error) { +// PruneUnused will delete all unused emoji media from the database and storage driver. +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) PruneUnused(ctx context.Context) (int, error) {  	var (  		total int  		maxID string @@ -114,9 +187,9 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  	for {  		// Fetch the next batch of emoji media up to next ID. -		emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit) +		emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit)  		if err != nil && !errors.Is(err, db.ErrNoEntries) { -			return total, gtserror.Newf("error getting emojis: %w", err) +			return total, gtserror.Newf("error getting remote emojis: %w", err)  		}  		if len(emojis) == 0 { @@ -128,8 +201,50 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  		maxID = emojis[len(emojis)-1].ID  		for _, emoji := range emojis { -			// Check / fix missing broken emoji. -			fixed, err := e.fixBroken(ctx, emoji) +			// Check / prune unused emoji media. +			fixed, err := e.pruneUnused(ctx, emoji) +			if err != nil { +				return total, err +			} + +			if fixed { +				// Update +				// count. +				total++ +			} +		} +	} + +	return total, nil +} + +// FixCacheStatus will check all emoji for up-to-date cache status (i.e. in storage driver). +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) FixCacheStates(ctx context.Context) (int, error) { +	var ( +		total int +		maxID string +	) + +	for { +		// Fetch the next batch of emoji media up to next ID. +		emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit) +		if err != nil && !errors.Is(err, db.ErrNoEntries) { +			return total, gtserror.Newf("error getting remote emojis: %w", err) +		} + +		if len(emojis) == 0 { +			// reached end. +			break +		} + +		// Use last as the next 'maxID' value. +		maxID = emojis[len(emojis)-1].ID + +		for _, emoji := range emojis { +			// Check / fix required emoji cache states. +			fixed, err := e.fixCacheState(ctx, emoji)  			if err != nil {  				return total, err  			} @@ -145,22 +260,113 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  	return total, nil  } -func (e *Emoji) pruneMissing(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { -	return e.checkFiles(ctx, func() error { -		// Emoji missing files, delete it. -		// NOTE: if we ever support uncaching -		// of emojis, change to e.uncache(). -		// In that case we should also rename -		// this function to match the media -		// equivalent -> fixCacheState(). -		log.WithContext(ctx). -			WithField("emoji", emoji.ID). -			Debug("deleting due to missing emoji") -		return e.delete(ctx, emoji) -	}, +func (e *Emoji) pruneUnused(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	// Start a log entry for emoji. +	l := log.WithContext(ctx). +		WithField("emoji", emoji.ID) + +	// Load any related accounts using this emoji. +	accounts, err := e.getRelatedAccounts(ctx, emoji) +	if err != nil { +		return false, err +	} else if len(accounts) > 0 { +		l.Debug("skipping as account emoji in use") +		return false, nil +	} + +	// Load any related statuses using this emoji. +	statuses, err := e.getRelatedStatuses(ctx, emoji) +	if err != nil { +		return false, err +	} else if len(statuses) > 0 { +		l.Debug("skipping as status emoji in use") +		return false, nil +	} + +	// Check not recently created, give it some time to be "used" again. +	if time.Now().Add(-24 * time.Hour * 7).Before(emoji.CreatedAt) { +		l.Debug("skipping due to recently created") +		return false, nil +	} + +	// Emoji totally unused, delete it. +	l.Debug("deleting unused emoji") +	return true, e.delete(ctx, emoji) +} + +func (e *Emoji) fixCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	// Start a log entry for emoji. +	l := log.WithContext(ctx). +		WithField("emoji", emoji.ID) + +	// Check whether files exist. +	exist, err := e.haveFiles(ctx,  		emoji.ImageStaticPath,  		emoji.ImagePath,  	) +	if err != nil { +		return false, err +	} + +	switch { +	case *emoji.Cached && !exist: +		// Mark as uncached if expected files don't exist. +		l.Debug("cached=true exists=false => marking uncached") +		return true, e.uncache(ctx, emoji) + +	case !*emoji.Cached && exist: +		// Remove files if we don't expect them to exist. +		l.Debug("cached=false exists=true => removing files") +		_, err := e.removeFiles(ctx, +			emoji.ImageStaticPath, +			emoji.ImagePath, +		) +		return true, err + +	default: +		return false, nil +	} +} + +func (e *Emoji) uncacheRemote(ctx context.Context, after time.Time, emoji *gtsmodel.Emoji) (bool, error) { +	if !*emoji.Cached { +		// Already uncached. +		return false, nil +	} + +	// Start a log entry for emoji. +	l := log.WithContext(ctx). +		WithField("emoji", emoji.ID) + +	// Load any related accounts using this emoji. +	accounts, err := e.getRelatedAccounts(ctx, emoji) +	if err != nil { +		return false, err +	} + +	for _, account := range accounts { +		if account.FetchedAt.After(after) { +			l.Debug("skipping due to recently fetched account") +			return false, nil +		} +	} + +	// Load any related statuses using this emoji. +	statuses, err := e.getRelatedStatuses(ctx, emoji) +	if err != nil { +		return false, err +	} + +	for _, status := range statuses { +		if status.FetchedAt.After(after) { +			l.Debug("skipping due to recently fetched status") +			return false, nil +		} +	} + +	// This emoji is too old, uncache it. +	l.Debug("uncaching old remote emoji") +	return true, e.uncache(ctx, emoji)  }  func (e *Emoji) fixBroken(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { @@ -214,6 +420,47 @@ func (e *Emoji) getRelatedCategory(ctx context.Context, emoji *gtsmodel.Emoji) (  	return category, false, nil  } +func (e *Emoji) getRelatedAccounts(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Account, error) { +	accounts, err := e.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return nil, gtserror.Newf("error fetching accounts using emoji %s: %w", emoji.ID, err) +	} +	return accounts, nil +} + +func (e *Emoji) getRelatedStatuses(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Status, error) { +	statuses, err := e.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return nil, gtserror.Newf("error fetching statuses using emoji %s: %w", emoji.ID, err) +	} +	return statuses, nil +} + +func (e *Emoji) uncache(ctx context.Context, emoji *gtsmodel.Emoji) error { +	if gtscontext.DryRun(ctx) { +		// Dry run, do nothing. +		return nil +	} + +	// Remove emoji and static. +	_, err := e.removeFiles(ctx, +		emoji.ImagePath, +		emoji.ImageStaticPath, +	) +	if err != nil { +		return gtserror.Newf("error removing emoji files: %w", err) +	} + +	// Update emoji to reflect that we no longer have it cached. +	log.Debugf(ctx, "marking emoji as uncached: %s", emoji.ID) +	emoji.Cached = func() *bool { i := false; return &i }() +	if err := e.state.DB.UpdateEmoji(ctx, emoji, "cached"); err != nil { +		return gtserror.Newf("error updating emoji: %w", err) +	} + +	return nil +} +  func (e *Emoji) delete(ctx context.Context, emoji *gtsmodel.Emoji) error {  	if gtscontext.DryRun(ctx) {  		// Dry run, do nothing. diff --git a/internal/cleaner/emoji_test.go b/internal/cleaner/emoji_test.go new file mode 100644 index 000000000..81fde6e48 --- /dev/null +++ b/internal/cleaner/emoji_test.go @@ -0,0 +1,402 @@ +package cleaner_test + +import ( +	"context" +	"errors" +	"time" + +	"github.com/superseriousbusiness/gotosocial/internal/config" +	"github.com/superseriousbusiness/gotosocial/internal/db" +	"github.com/superseriousbusiness/gotosocial/internal/gtscontext" +	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +) + +func (suite *CleanerTestSuite) TestEmojiUncacheRemote() { +	suite.testEmojiUncacheRemote( +		context.Background(), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiUncacheRemoteDryRun() { +	suite.testEmojiUncacheRemote( +		gtscontext.SetDryRun(context.Background()), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiFixBroken() { +	suite.testEmojiFixBroken( +		context.Background(), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiFixBrokenDryRun() { +	suite.testEmojiFixBroken( +		gtscontext.SetDryRun(context.Background()), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiPruneUnused() { +	suite.testEmojiPruneUnused( +		context.Background(), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiPruneUnusedDryRun() { +	suite.testEmojiPruneUnused( +		gtscontext.SetDryRun(context.Background()), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiFixCacheStates() { +	suite.testEmojiFixCacheStates( +		context.Background(), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiFixCacheStatesDryRun() { +	suite.testEmojiFixCacheStates( +		gtscontext.SetDryRun(context.Background()), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) testEmojiUncacheRemote(ctx context.Context, emojis []*gtsmodel.Emoji) { +	var uncacheIDs []string + +	// Test state. +	t := suite.T() + +	// Get max remote cache days to keep. +	days := config.GetMediaRemoteCacheDays() +	olderThan := time.Now().Add(-24 * time.Hour * time.Duration(days)) + +	for _, emoji := range emojis { +		// Check whether this emoji should be uncached. +		ok, err := suite.shouldUncacheEmoji(ctx, emoji, olderThan) +		if err != nil { +			t.Fatalf("error checking whether emoji should be uncached: %v", err) +		} + +		if ok { +			// Mark this emoji ID as to be uncached. +			uncacheIDs = append(uncacheIDs, emoji.ID) +		} +	} + +	// Attempt to uncache remote emojis. +	found, err := suite.cleaner.Emoji().UncacheRemote(ctx, olderThan) +	if err != nil { +		t.Errorf("error uncaching remote emojis: %v", err) +		return +	} + +	// Check expected were uncached. +	if found != len(uncacheIDs) { +		t.Errorf("expected %d emojis to be uncached, %d were", len(uncacheIDs), found) +		return +	} + +	if gtscontext.DryRun(ctx) { +		// nothing else to test. +		return +	} + +	for _, id := range uncacheIDs { +		// Fetch the emoji by ID that should now be uncached. +		emoji, err := suite.state.DB.GetEmojiByID(ctx, id) +		if err != nil { +			t.Fatalf("error fetching emoji from database: %v", err) +		} + +		// Check cache state. +		if *emoji.Cached { +			t.Errorf("emoji %s@%s should have been uncached", emoji.Shortcode, emoji.Domain) +		} + +		// Check that the emoji files in storage have been deleted. +		if ok, err := suite.state.Storage.Has(ctx, emoji.ImagePath); err != nil { +			t.Fatalf("error checking storage for emoji: %v", err) +		} else if ok { +			t.Errorf("emoji %s@%s image path should not exist", emoji.Shortcode, emoji.Domain) +		} else if ok, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath); err != nil { +			t.Fatalf("error checking storage for emoji: %v", err) +		} else if ok { +			t.Errorf("emoji %s@%s image static path should not exist", emoji.Shortcode, emoji.Domain) +		} +	} +} + +func (suite *CleanerTestSuite) shouldUncacheEmoji(ctx context.Context, emoji *gtsmodel.Emoji, after time.Time) (bool, error) { +	if emoji.ImageRemoteURL == "" { +		// Local emojis are never uncached. +		return false, nil +	} + +	if emoji.Cached == nil || !*emoji.Cached { +		// Emoji is already uncached. +		return false, nil +	} + +	// Get related accounts using this emoji (if any). +	accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return false, err +	} + +	// Check if accounts are recently updated. +	for _, account := range accounts { +		if account.FetchedAt.After(after) { +			return false, nil +		} +	} + +	// Get related statuses using this emoji (if any). +	statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return false, err +	} + +	// Check if statuses are recently updated. +	for _, status := range statuses { +		if status.FetchedAt.After(after) { +			return false, nil +		} +	} + +	return true, nil +} + +func (suite *CleanerTestSuite) testEmojiFixBroken(ctx context.Context, emojis []*gtsmodel.Emoji) { +	var fixIDs []string + +	// Test state. +	t := suite.T() + +	for _, emoji := range emojis { +		// Check whether this emoji should be fixed. +		ok, err := suite.shouldFixBrokenEmoji(ctx, emoji) +		if err != nil { +			t.Fatalf("error checking whether emoji should be fixed: %v", err) +		} + +		if ok { +			// Mark this emoji ID as to be fixed. +			fixIDs = append(fixIDs, emoji.ID) +		} +	} + +	// Attempt to fix broken emojis. +	found, err := suite.cleaner.Emoji().FixBroken(ctx) +	if err != nil { +		t.Errorf("error fixing broken emojis: %v", err) +		return +	} + +	// Check expected were fixed. +	if found != len(fixIDs) { +		t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found) +		return +	} + +	if gtscontext.DryRun(ctx) { +		// nothing else to test. +		return +	} + +	for _, id := range fixIDs { +		// Fetch the emoji by ID that should now be fixed. +		emoji, err := suite.state.DB.GetEmojiByID(ctx, id) +		if err != nil { +			t.Fatalf("error fetching emoji from database: %v", err) +		} + +		// Ensure category was cleared. +		if emoji.CategoryID != "" { +			t.Errorf("emoji %s@%s should have empty category", emoji.Shortcode, emoji.Domain) +		} +	} +} + +func (suite *CleanerTestSuite) shouldFixBrokenEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	if emoji.CategoryID == "" { +		// no category issue. +		return false, nil +	} + +	// Get the related category for this emoji. +	category, err := suite.state.DB.GetEmojiCategory(ctx, emoji.CategoryID) +	if err != nil && !errors.Is(err, db.ErrNoEntries) { +		return false, nil +	} + +	return (category == nil), nil +} + +func (suite *CleanerTestSuite) testEmojiPruneUnused(ctx context.Context, emojis []*gtsmodel.Emoji) { +	var pruneIDs []string + +	// Test state. +	t := suite.T() + +	for _, emoji := range emojis { +		// Check whether this emoji should be pruned. +		ok, err := suite.shouldPruneEmoji(ctx, emoji) +		if err != nil { +			t.Fatalf("error checking whether emoji should be pruned: %v", err) +		} + +		if ok { +			// Mark this emoji ID as to be pruned. +			pruneIDs = append(pruneIDs, emoji.ID) +		} +	} + +	// Attempt to prune emojis. +	found, err := suite.cleaner.Emoji().PruneUnused(ctx) +	if err != nil { +		t.Errorf("error fixing broken emojis: %v", err) +		return +	} + +	// Check expected were pruned. +	if found != len(pruneIDs) { +		t.Errorf("expected %d emojis to be pruned, %d were", len(pruneIDs), found) +		return +	} + +	if gtscontext.DryRun(ctx) { +		// nothing else to test. +		return +	} + +	for _, id := range pruneIDs { +		// Fetch the emoji by ID that should now be pruned. +		emoji, err := suite.state.DB.GetEmojiByID(ctx, id) +		if err != nil && !errors.Is(err, db.ErrNoEntries) { +			t.Fatalf("error fetching emoji from database: %v", err) +		} + +		// Ensure gone. +		if emoji != nil { +			t.Errorf("emoji %s@%s should have been pruned", emoji.Shortcode, emoji.Domain) +		} +	} +} + +func (suite *CleanerTestSuite) shouldPruneEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	if emoji.ImageRemoteURL == "" { +		// Local emojis are never pruned. +		return false, nil +	} + +	// Get related accounts using this emoji (if any). +	accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return false, err +	} else if len(accounts) > 0 { +		return false, nil +	} + +	// Get related statuses using this emoji (if any). +	statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return false, err +	} else if len(statuses) > 0 { +		return false, nil +	} + +	return true, nil +} + +func (suite *CleanerTestSuite) testEmojiFixCacheStates(ctx context.Context, emojis []*gtsmodel.Emoji) { +	var fixIDs []string + +	// Test state. +	t := suite.T() + +	for _, emoji := range emojis { +		// Check whether this emoji should be fixed. +		ok, err := suite.shouldFixEmojiCacheState(ctx, emoji) +		if err != nil { +			t.Fatalf("error checking whether emoji should be fixed: %v", err) +		} + +		if ok { +			// Mark this emoji ID as to be fixed. +			fixIDs = append(fixIDs, emoji.ID) +		} +	} + +	// Attempt to fix broken emoji cache states. +	found, err := suite.cleaner.Emoji().FixCacheStates(ctx) +	if err != nil { +		t.Errorf("error fixing broken emojis: %v", err) +		return +	} + +	// Check expected were fixed. +	if found != len(fixIDs) { +		t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found) +		return +	} + +	if gtscontext.DryRun(ctx) { +		// nothing else to test. +		return +	} + +	for _, id := range fixIDs { +		// Fetch the emoji by ID that should now be fixed. +		emoji, err := suite.state.DB.GetEmojiByID(ctx, id) +		if err != nil { +			t.Fatalf("error fetching emoji from database: %v", err) +		} + +		// Ensure emoji cache state has been fixed. +		ok, err := suite.shouldFixEmojiCacheState(ctx, emoji) +		if err != nil { +			t.Fatalf("error checking whether emoji should be fixed: %v", err) +		} else if ok { +			t.Errorf("emoji %s@%s cache state should have been fixed", emoji.Shortcode, emoji.Domain) +		} +	} +} + +func (suite *CleanerTestSuite) shouldFixEmojiCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	// Check whether emoji image path exists. +	haveImage, err := suite.state.Storage.Has(ctx, emoji.ImagePath) +	if err != nil { +		return false, err +	} + +	// Check whether emoji static path exists. +	haveStatic, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath) +	if err != nil { +		return false, err +	} + +	switch exists := (haveImage && haveStatic); { +	case emoji.Cached != nil && +		*emoji.Cached && !exists: +		// (cached can be nil in tests) +		// Cached but missing files. +		return true, nil + +	case emoji.Cached != nil && +		!*emoji.Cached && exists: +		// (cached can be nil in tests) +		// Uncached but unexpected files. +		return true, nil + +	default: +		// No cache state issue. +		return false, nil +	} +} diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go index 51a0aea6d..8b11a30bf 100644 --- a/internal/cleaner/media.go +++ b/internal/cleaner/media.go @@ -96,9 +96,9 @@ func (m *Media) PruneOrphaned(ctx context.Context) (int, error) {  	// All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext}  	if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error { +		// Check for our expected fileserver path format.  		if !regexes.FilePath.MatchString(path) { -			// This is not our expected media -			// path format, skip this one. +			log.Warn(ctx, "unexpected storage item: %s", path)  			return nil  		} @@ -177,10 +177,10 @@ func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er  	mostRecent := olderThan  	for { -		// Fetch the next batch of attachments older than last-set time. -		attachments, err := m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectLimit) +		// Fetch the next batch of cached attachments older than last-set time. +		attachments, err := m.state.DB.GetCachedAttachmentsOlderThan(ctx, olderThan, selectLimit)  		if err != nil && !errors.Is(err, db.ErrNoEntries) { -			return total, gtserror.Newf("error getting remote media: %w", err) +			return total, gtserror.Newf("error getting remote attachments: %w", err)  		}  		if len(attachments) == 0 { @@ -220,9 +220,9 @@ func (m *Media) FixCacheStates(ctx context.Context) (int, error) {  	for {  		// Fetch the next batch of media attachments up to next max ID. -		attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) +		attachments, err := m.state.DB.GetRemoteAttachments(ctx, maxID, selectLimit)  		if err != nil && !errors.Is(err, db.ErrNoEntries) { -			return total, gtserror.Newf("error getting avatars / headers: %w", err) +			return total, gtserror.Newf("error getting remote attachments: %w", err)  		}  		if len(attachments) == 0 { @@ -323,7 +323,7 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment  	l := log.WithContext(ctx).  		WithField("media", media.ID) -		// Check whether we have the required account for media. +	// Check whether we have the required account for media.  	account, missing, err := m.getRelatedAccount(ctx, media)  	if err != nil {  		return false, err @@ -367,14 +367,6 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment  }  func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { -	if !*media.Cached { -		// We ignore uncached media, a -		// false negative is a much better -		// situation than a false positive, -		// re-cache will just overwrite it. -		return false, nil -	} -  	// Start a log entry for media.  	l := log.WithContext(ctx).  		WithField("media", media.ID) @@ -397,15 +389,33 @@ func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachme  		return false, nil  	} -	// So we know this a valid cached media entry. -	// Check that we have the files on disk required.... -	return m.checkFiles(ctx, func() error { -		l.Debug("uncaching due to missing media") -		return m.uncache(ctx, media) -	}, +	// Check whether files exist. +	exist, err := m.haveFiles(ctx,  		media.Thumbnail.Path,  		media.File.Path,  	) +	if err != nil { +		return false, err +	} + +	switch { +	case *media.Cached && !exist: +		// Mark as uncached if expected files don't exist. +		l.Debug("cached=true exists=false => uncaching") +		return true, m.uncache(ctx, media) + +	case !*media.Cached && exist: +		// Remove files if we don't expect them to exist. +		l.Debug("cached=false exists=true => deleting") +		_, err := m.removeFiles(ctx, +			media.Thumbnail.Path, +			media.File.Path, +		) +		return true, err + +	default: +		return false, nil +	}  }  func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) {  | 
