diff options
Diffstat (limited to 'internal')
29 files changed, 1238 insertions, 216 deletions
diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go index ee1e4785f..70497c10e 100644 --- a/internal/cleaner/cleaner.go +++ b/internal/cleaner/cleaner.go @@ -61,19 +61,19 @@ func (c *Cleaner) Media() *Media {  	return &c.media  } -// checkFiles checks for each of the provided files, and calls onMissing() if any of them are missing. Returns true if missing. -func (c *Cleaner) checkFiles(ctx context.Context, onMissing func() error, files ...string) (bool, error) { +// haveFiles returns whether all of the provided files exist within current storage. +func (c *Cleaner) haveFiles(ctx context.Context, files ...string) (bool, error) {  	for _, file := range files {  		// Check whether each file exists in storage.  		have, err := c.state.Storage.Has(ctx, file)  		if err != nil {  			return false, gtserror.Newf("error checking storage for %s: %w", file, err)  		} else if !have { -			// Missing files, perform hook. -			return true, onMissing() +			// Missing file(s). +			return false, nil  		}  	} -	return false, nil +	return true, nil  }  // removeFiles removes the provided files, returning the number of them returned. @@ -129,7 +129,7 @@ func scheduleJobs(c *Cleaner) {  	c.state.Workers.Scheduler.Schedule(sched.NewJob(func(start time.Time) {  		log.Info(nil, "starting media clean")  		c.Media().All(doneCtx, config.GetMediaRemoteCacheDays()) -		c.Emoji().All(doneCtx) +		c.Emoji().All(doneCtx, config.GetMediaRemoteCacheDays())  		log.Infof(nil, "finished media clean after %s", time.Since(start))  	}).EveryAt(midnight, day))  } diff --git a/internal/cleaner/cleaner_test.go b/internal/cleaner/cleaner_test.go new file mode 100644 index 000000000..d23dac504 --- /dev/null +++ b/internal/cleaner/cleaner_test.go @@ -0,0 +1,80 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. + +package cleaner_test + +import ( +	"testing" + +	"github.com/stretchr/testify/suite" +	"github.com/superseriousbusiness/gotosocial/internal/cleaner" +	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/state" +	"github.com/superseriousbusiness/gotosocial/testrig" +) + +type CleanerTestSuite struct { +	state   state.State +	cleaner *cleaner.Cleaner +	emojis  map[string]*gtsmodel.Emoji +	suite.Suite +} + +func TestCleanerTestSuite(t *testing.T) { +	suite.Run(t, &CleanerTestSuite{}) +} + +func (suite *CleanerTestSuite) SetupSuite() { +	testrig.InitTestConfig() +	testrig.InitTestLog() +} + +func (suite *CleanerTestSuite) SetupTest() { +	// Initialize gts caches. +	suite.state.Caches.Init() + +	// Ensure scheduler started (even if unused). +	suite.state.Workers.Scheduler.Start(nil) + +	// Initialize test database. +	_ = testrig.NewTestDB(&suite.state) +	testrig.StandardDBSetup(suite.state.DB, nil) + +	// Initialize test storage (in-memory). +	suite.state.Storage = testrig.NewInMemoryStorage() + +	// Initialize test cleaner instance. +	suite.cleaner = cleaner.New(&suite.state) + +	// Allocate new test model emojis. +	suite.emojis = testrig.NewTestEmojis() +} + +func (suite *CleanerTestSuite) TearDownTest() { +	testrig.StandardDBTeardown(suite.state.DB) +} + +// mapvals extracts a slice of values from the values contained within the map. +func mapvals[Key comparable, Val any](m map[Key]Val) []Val { +	var i int +	vals := make([]Val, len(m)) +	for _, val := range m { +		vals[i] = val +		i++ +	} +	return vals +} diff --git a/internal/cleaner/emoji.go b/internal/cleaner/emoji.go index 35e579171..d2baec7e8 100644 --- a/internal/cleaner/emoji.go +++ b/internal/cleaner/emoji.go @@ -20,6 +20,7 @@ package cleaner  import (  	"context"  	"errors" +	"time"  	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/superseriousbusiness/gotosocial/internal/gtscontext" @@ -36,22 +37,26 @@ type Emoji struct {  // All will execute all cleaner.Emoji utilities synchronously, including output logging.  // Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. -func (e *Emoji) All(ctx context.Context) { -	e.LogPruneMissing(ctx) +func (e *Emoji) All(ctx context.Context, maxRemoteDays int) { +	t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays)) +	e.LogUncacheRemote(ctx, t)  	e.LogFixBroken(ctx) +	e.LogPruneUnused(ctx) +	e.LogFixCacheStates(ctx) +	_ = e.state.Storage.Storage.Clean(ctx)  } -// LogPruneMissing performs emoji.PruneMissing(...), logging the start and outcome. -func (e *Emoji) LogPruneMissing(ctx context.Context) { -	log.Info(ctx, "start") -	if n, err := e.PruneMissing(ctx); err != nil { +// LogUncacheRemote performs Emoji.UncacheRemote(...), logging the start and outcome. +func (e *Emoji) LogUncacheRemote(ctx context.Context, olderThan time.Time) { +	log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp)) +	if n, err := e.UncacheRemote(ctx, olderThan); err != nil {  		log.Error(ctx, err)  	} else { -		log.Infof(ctx, "pruned: %d", n) +		log.Infof(ctx, "uncached: %d", n)  	}  } -// LogFixBroken performs emoji.FixBroken(...), logging the start and outcome. +// LogFixBroken performs Emoji.FixBroken(...), logging the start and outcome.  func (e *Emoji) LogFixBroken(ctx context.Context) {  	log.Info(ctx, "start")  	if n, err := e.FixBroken(ctx); err != nil { @@ -61,10 +66,78 @@ func (e *Emoji) LogFixBroken(ctx context.Context) {  	}  } -// PruneMissing will delete emoji with missing files from the database and storage driver. -// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function -// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. -func (e *Emoji) PruneMissing(ctx context.Context) (int, error) { +// LogPruneUnused performs Emoji.PruneUnused(...), logging the start and outcome. +func (e *Emoji) LogPruneUnused(ctx context.Context) { +	log.Info(ctx, "start") +	if n, err := e.PruneUnused(ctx); err != nil { +		log.Error(ctx, err) +	} else { +		log.Infof(ctx, "pruned: %d", n) +	} +} + +// LogFixCacheStates performs Emoji.FixCacheStates(...), logging the start and outcome. +func (e *Emoji) LogFixCacheStates(ctx context.Context) { +	log.Info(ctx, "start") +	if n, err := e.FixCacheStates(ctx); err != nil { +		log.Error(ctx, err) +	} else { +		log.Infof(ctx, "fixed: %d", n) +	} +} + +// UncacheRemote will uncache all remote emoji older than given input time. Context +// will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) { +	var total int + +	// Drop time by a minute to improve search, +	// (i.e. make it olderThan inclusive search). +	olderThan = olderThan.Add(-time.Minute) + +	// Store recent time. +	mostRecent := olderThan + +	for { +		// Fetch the next batch of cached emojis older than last-set time. +		emojis, err := e.state.DB.GetCachedEmojisOlderThan(ctx, olderThan, selectLimit) +		if err != nil && !errors.Is(err, db.ErrNoEntries) { +			return total, gtserror.Newf("error getting remote emoji: %w", err) +		} + +		if len(emojis) == 0 { +			// reached end. +			break +		} + +		// Use last created-at as the next 'olderThan' value. +		olderThan = emojis[len(emojis)-1].CreatedAt + +		for _, emoji := range emojis { +			// Check / uncache each remote emoji. +			uncached, err := e.uncacheRemote(ctx, +				mostRecent, +				emoji, +			) +			if err != nil { +				return total, err +			} + +			if uncached { +				// Update +				// count. +				total++ +			} +		} +	} + +	return total, nil +} + +// FixBroken will check all emojis for valid related models (e.g. category). +// Broken media will be automatically updated to remove now-missing models. +// Context will be checked for `gtscontext.DryRun()` to perform the action. +func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  	var (  		total int  		maxID string @@ -86,8 +159,8 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {  		maxID = emojis[len(emojis)-1].ID  		for _, emoji := range emojis { -			// Check / fix missing emoji media. -			fixed, err := e.pruneMissing(ctx, emoji) +			// Check / fix missing broken emoji. +			fixed, err := e.fixBroken(ctx, emoji)  			if err != nil {  				return total, err  			} @@ -103,10 +176,10 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) {  	return total, nil  } -// FixBroken will check all emojis for valid related models (e.g. category). -// Broken media will be automatically updated to remove now-missing models. -// Context will be checked for `gtscontext.DryRun()` to perform the action. -func (e *Emoji) FixBroken(ctx context.Context) (int, error) { +// PruneUnused will delete all unused emoji media from the database and storage driver. +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) PruneUnused(ctx context.Context) (int, error) {  	var (  		total int  		maxID string @@ -114,9 +187,9 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  	for {  		// Fetch the next batch of emoji media up to next ID. -		emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit) +		emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit)  		if err != nil && !errors.Is(err, db.ErrNoEntries) { -			return total, gtserror.Newf("error getting emojis: %w", err) +			return total, gtserror.Newf("error getting remote emojis: %w", err)  		}  		if len(emojis) == 0 { @@ -128,8 +201,50 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  		maxID = emojis[len(emojis)-1].ID  		for _, emoji := range emojis { -			// Check / fix missing broken emoji. -			fixed, err := e.fixBroken(ctx, emoji) +			// Check / prune unused emoji media. +			fixed, err := e.pruneUnused(ctx, emoji) +			if err != nil { +				return total, err +			} + +			if fixed { +				// Update +				// count. +				total++ +			} +		} +	} + +	return total, nil +} + +// FixCacheStatus will check all emoji for up-to-date cache status (i.e. in storage driver). +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) FixCacheStates(ctx context.Context) (int, error) { +	var ( +		total int +		maxID string +	) + +	for { +		// Fetch the next batch of emoji media up to next ID. +		emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit) +		if err != nil && !errors.Is(err, db.ErrNoEntries) { +			return total, gtserror.Newf("error getting remote emojis: %w", err) +		} + +		if len(emojis) == 0 { +			// reached end. +			break +		} + +		// Use last as the next 'maxID' value. +		maxID = emojis[len(emojis)-1].ID + +		for _, emoji := range emojis { +			// Check / fix required emoji cache states. +			fixed, err := e.fixCacheState(ctx, emoji)  			if err != nil {  				return total, err  			} @@ -145,22 +260,113 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  	return total, nil  } -func (e *Emoji) pruneMissing(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { -	return e.checkFiles(ctx, func() error { -		// Emoji missing files, delete it. -		// NOTE: if we ever support uncaching -		// of emojis, change to e.uncache(). -		// In that case we should also rename -		// this function to match the media -		// equivalent -> fixCacheState(). -		log.WithContext(ctx). -			WithField("emoji", emoji.ID). -			Debug("deleting due to missing emoji") -		return e.delete(ctx, emoji) -	}, +func (e *Emoji) pruneUnused(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	// Start a log entry for emoji. +	l := log.WithContext(ctx). +		WithField("emoji", emoji.ID) + +	// Load any related accounts using this emoji. +	accounts, err := e.getRelatedAccounts(ctx, emoji) +	if err != nil { +		return false, err +	} else if len(accounts) > 0 { +		l.Debug("skipping as account emoji in use") +		return false, nil +	} + +	// Load any related statuses using this emoji. +	statuses, err := e.getRelatedStatuses(ctx, emoji) +	if err != nil { +		return false, err +	} else if len(statuses) > 0 { +		l.Debug("skipping as status emoji in use") +		return false, nil +	} + +	// Check not recently created, give it some time to be "used" again. +	if time.Now().Add(-24 * time.Hour * 7).Before(emoji.CreatedAt) { +		l.Debug("skipping due to recently created") +		return false, nil +	} + +	// Emoji totally unused, delete it. +	l.Debug("deleting unused emoji") +	return true, e.delete(ctx, emoji) +} + +func (e *Emoji) fixCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	// Start a log entry for emoji. +	l := log.WithContext(ctx). +		WithField("emoji", emoji.ID) + +	// Check whether files exist. +	exist, err := e.haveFiles(ctx,  		emoji.ImageStaticPath,  		emoji.ImagePath,  	) +	if err != nil { +		return false, err +	} + +	switch { +	case *emoji.Cached && !exist: +		// Mark as uncached if expected files don't exist. +		l.Debug("cached=true exists=false => marking uncached") +		return true, e.uncache(ctx, emoji) + +	case !*emoji.Cached && exist: +		// Remove files if we don't expect them to exist. +		l.Debug("cached=false exists=true => removing files") +		_, err := e.removeFiles(ctx, +			emoji.ImageStaticPath, +			emoji.ImagePath, +		) +		return true, err + +	default: +		return false, nil +	} +} + +func (e *Emoji) uncacheRemote(ctx context.Context, after time.Time, emoji *gtsmodel.Emoji) (bool, error) { +	if !*emoji.Cached { +		// Already uncached. +		return false, nil +	} + +	// Start a log entry for emoji. +	l := log.WithContext(ctx). +		WithField("emoji", emoji.ID) + +	// Load any related accounts using this emoji. +	accounts, err := e.getRelatedAccounts(ctx, emoji) +	if err != nil { +		return false, err +	} + +	for _, account := range accounts { +		if account.FetchedAt.After(after) { +			l.Debug("skipping due to recently fetched account") +			return false, nil +		} +	} + +	// Load any related statuses using this emoji. +	statuses, err := e.getRelatedStatuses(ctx, emoji) +	if err != nil { +		return false, err +	} + +	for _, status := range statuses { +		if status.FetchedAt.After(after) { +			l.Debug("skipping due to recently fetched status") +			return false, nil +		} +	} + +	// This emoji is too old, uncache it. +	l.Debug("uncaching old remote emoji") +	return true, e.uncache(ctx, emoji)  }  func (e *Emoji) fixBroken(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { @@ -214,6 +420,47 @@ func (e *Emoji) getRelatedCategory(ctx context.Context, emoji *gtsmodel.Emoji) (  	return category, false, nil  } +func (e *Emoji) getRelatedAccounts(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Account, error) { +	accounts, err := e.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return nil, gtserror.Newf("error fetching accounts using emoji %s: %w", emoji.ID, err) +	} +	return accounts, nil +} + +func (e *Emoji) getRelatedStatuses(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Status, error) { +	statuses, err := e.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return nil, gtserror.Newf("error fetching statuses using emoji %s: %w", emoji.ID, err) +	} +	return statuses, nil +} + +func (e *Emoji) uncache(ctx context.Context, emoji *gtsmodel.Emoji) error { +	if gtscontext.DryRun(ctx) { +		// Dry run, do nothing. +		return nil +	} + +	// Remove emoji and static. +	_, err := e.removeFiles(ctx, +		emoji.ImagePath, +		emoji.ImageStaticPath, +	) +	if err != nil { +		return gtserror.Newf("error removing emoji files: %w", err) +	} + +	// Update emoji to reflect that we no longer have it cached. +	log.Debugf(ctx, "marking emoji as uncached: %s", emoji.ID) +	emoji.Cached = func() *bool { i := false; return &i }() +	if err := e.state.DB.UpdateEmoji(ctx, emoji, "cached"); err != nil { +		return gtserror.Newf("error updating emoji: %w", err) +	} + +	return nil +} +  func (e *Emoji) delete(ctx context.Context, emoji *gtsmodel.Emoji) error {  	if gtscontext.DryRun(ctx) {  		// Dry run, do nothing. diff --git a/internal/cleaner/emoji_test.go b/internal/cleaner/emoji_test.go new file mode 100644 index 000000000..81fde6e48 --- /dev/null +++ b/internal/cleaner/emoji_test.go @@ -0,0 +1,402 @@ +package cleaner_test + +import ( +	"context" +	"errors" +	"time" + +	"github.com/superseriousbusiness/gotosocial/internal/config" +	"github.com/superseriousbusiness/gotosocial/internal/db" +	"github.com/superseriousbusiness/gotosocial/internal/gtscontext" +	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +) + +func (suite *CleanerTestSuite) TestEmojiUncacheRemote() { +	suite.testEmojiUncacheRemote( +		context.Background(), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiUncacheRemoteDryRun() { +	suite.testEmojiUncacheRemote( +		gtscontext.SetDryRun(context.Background()), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiFixBroken() { +	suite.testEmojiFixBroken( +		context.Background(), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiFixBrokenDryRun() { +	suite.testEmojiFixBroken( +		gtscontext.SetDryRun(context.Background()), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiPruneUnused() { +	suite.testEmojiPruneUnused( +		context.Background(), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiPruneUnusedDryRun() { +	suite.testEmojiPruneUnused( +		gtscontext.SetDryRun(context.Background()), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiFixCacheStates() { +	suite.testEmojiFixCacheStates( +		context.Background(), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) TestEmojiFixCacheStatesDryRun() { +	suite.testEmojiFixCacheStates( +		gtscontext.SetDryRun(context.Background()), +		mapvals(suite.emojis), +	) +} + +func (suite *CleanerTestSuite) testEmojiUncacheRemote(ctx context.Context, emojis []*gtsmodel.Emoji) { +	var uncacheIDs []string + +	// Test state. +	t := suite.T() + +	// Get max remote cache days to keep. +	days := config.GetMediaRemoteCacheDays() +	olderThan := time.Now().Add(-24 * time.Hour * time.Duration(days)) + +	for _, emoji := range emojis { +		// Check whether this emoji should be uncached. +		ok, err := suite.shouldUncacheEmoji(ctx, emoji, olderThan) +		if err != nil { +			t.Fatalf("error checking whether emoji should be uncached: %v", err) +		} + +		if ok { +			// Mark this emoji ID as to be uncached. +			uncacheIDs = append(uncacheIDs, emoji.ID) +		} +	} + +	// Attempt to uncache remote emojis. +	found, err := suite.cleaner.Emoji().UncacheRemote(ctx, olderThan) +	if err != nil { +		t.Errorf("error uncaching remote emojis: %v", err) +		return +	} + +	// Check expected were uncached. +	if found != len(uncacheIDs) { +		t.Errorf("expected %d emojis to be uncached, %d were", len(uncacheIDs), found) +		return +	} + +	if gtscontext.DryRun(ctx) { +		// nothing else to test. +		return +	} + +	for _, id := range uncacheIDs { +		// Fetch the emoji by ID that should now be uncached. +		emoji, err := suite.state.DB.GetEmojiByID(ctx, id) +		if err != nil { +			t.Fatalf("error fetching emoji from database: %v", err) +		} + +		// Check cache state. +		if *emoji.Cached { +			t.Errorf("emoji %s@%s should have been uncached", emoji.Shortcode, emoji.Domain) +		} + +		// Check that the emoji files in storage have been deleted. +		if ok, err := suite.state.Storage.Has(ctx, emoji.ImagePath); err != nil { +			t.Fatalf("error checking storage for emoji: %v", err) +		} else if ok { +			t.Errorf("emoji %s@%s image path should not exist", emoji.Shortcode, emoji.Domain) +		} else if ok, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath); err != nil { +			t.Fatalf("error checking storage for emoji: %v", err) +		} else if ok { +			t.Errorf("emoji %s@%s image static path should not exist", emoji.Shortcode, emoji.Domain) +		} +	} +} + +func (suite *CleanerTestSuite) shouldUncacheEmoji(ctx context.Context, emoji *gtsmodel.Emoji, after time.Time) (bool, error) { +	if emoji.ImageRemoteURL == "" { +		// Local emojis are never uncached. +		return false, nil +	} + +	if emoji.Cached == nil || !*emoji.Cached { +		// Emoji is already uncached. +		return false, nil +	} + +	// Get related accounts using this emoji (if any). +	accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return false, err +	} + +	// Check if accounts are recently updated. +	for _, account := range accounts { +		if account.FetchedAt.After(after) { +			return false, nil +		} +	} + +	// Get related statuses using this emoji (if any). +	statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return false, err +	} + +	// Check if statuses are recently updated. +	for _, status := range statuses { +		if status.FetchedAt.After(after) { +			return false, nil +		} +	} + +	return true, nil +} + +func (suite *CleanerTestSuite) testEmojiFixBroken(ctx context.Context, emojis []*gtsmodel.Emoji) { +	var fixIDs []string + +	// Test state. +	t := suite.T() + +	for _, emoji := range emojis { +		// Check whether this emoji should be fixed. +		ok, err := suite.shouldFixBrokenEmoji(ctx, emoji) +		if err != nil { +			t.Fatalf("error checking whether emoji should be fixed: %v", err) +		} + +		if ok { +			// Mark this emoji ID as to be fixed. +			fixIDs = append(fixIDs, emoji.ID) +		} +	} + +	// Attempt to fix broken emojis. +	found, err := suite.cleaner.Emoji().FixBroken(ctx) +	if err != nil { +		t.Errorf("error fixing broken emojis: %v", err) +		return +	} + +	// Check expected were fixed. +	if found != len(fixIDs) { +		t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found) +		return +	} + +	if gtscontext.DryRun(ctx) { +		// nothing else to test. +		return +	} + +	for _, id := range fixIDs { +		// Fetch the emoji by ID that should now be fixed. +		emoji, err := suite.state.DB.GetEmojiByID(ctx, id) +		if err != nil { +			t.Fatalf("error fetching emoji from database: %v", err) +		} + +		// Ensure category was cleared. +		if emoji.CategoryID != "" { +			t.Errorf("emoji %s@%s should have empty category", emoji.Shortcode, emoji.Domain) +		} +	} +} + +func (suite *CleanerTestSuite) shouldFixBrokenEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	if emoji.CategoryID == "" { +		// no category issue. +		return false, nil +	} + +	// Get the related category for this emoji. +	category, err := suite.state.DB.GetEmojiCategory(ctx, emoji.CategoryID) +	if err != nil && !errors.Is(err, db.ErrNoEntries) { +		return false, nil +	} + +	return (category == nil), nil +} + +func (suite *CleanerTestSuite) testEmojiPruneUnused(ctx context.Context, emojis []*gtsmodel.Emoji) { +	var pruneIDs []string + +	// Test state. +	t := suite.T() + +	for _, emoji := range emojis { +		// Check whether this emoji should be pruned. +		ok, err := suite.shouldPruneEmoji(ctx, emoji) +		if err != nil { +			t.Fatalf("error checking whether emoji should be pruned: %v", err) +		} + +		if ok { +			// Mark this emoji ID as to be pruned. +			pruneIDs = append(pruneIDs, emoji.ID) +		} +	} + +	// Attempt to prune emojis. +	found, err := suite.cleaner.Emoji().PruneUnused(ctx) +	if err != nil { +		t.Errorf("error fixing broken emojis: %v", err) +		return +	} + +	// Check expected were pruned. +	if found != len(pruneIDs) { +		t.Errorf("expected %d emojis to be pruned, %d were", len(pruneIDs), found) +		return +	} + +	if gtscontext.DryRun(ctx) { +		// nothing else to test. +		return +	} + +	for _, id := range pruneIDs { +		// Fetch the emoji by ID that should now be pruned. +		emoji, err := suite.state.DB.GetEmojiByID(ctx, id) +		if err != nil && !errors.Is(err, db.ErrNoEntries) { +			t.Fatalf("error fetching emoji from database: %v", err) +		} + +		// Ensure gone. +		if emoji != nil { +			t.Errorf("emoji %s@%s should have been pruned", emoji.Shortcode, emoji.Domain) +		} +	} +} + +func (suite *CleanerTestSuite) shouldPruneEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	if emoji.ImageRemoteURL == "" { +		// Local emojis are never pruned. +		return false, nil +	} + +	// Get related accounts using this emoji (if any). +	accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return false, err +	} else if len(accounts) > 0 { +		return false, nil +	} + +	// Get related statuses using this emoji (if any). +	statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) +	if err != nil { +		return false, err +	} else if len(statuses) > 0 { +		return false, nil +	} + +	return true, nil +} + +func (suite *CleanerTestSuite) testEmojiFixCacheStates(ctx context.Context, emojis []*gtsmodel.Emoji) { +	var fixIDs []string + +	// Test state. +	t := suite.T() + +	for _, emoji := range emojis { +		// Check whether this emoji should be fixed. +		ok, err := suite.shouldFixEmojiCacheState(ctx, emoji) +		if err != nil { +			t.Fatalf("error checking whether emoji should be fixed: %v", err) +		} + +		if ok { +			// Mark this emoji ID as to be fixed. +			fixIDs = append(fixIDs, emoji.ID) +		} +	} + +	// Attempt to fix broken emoji cache states. +	found, err := suite.cleaner.Emoji().FixCacheStates(ctx) +	if err != nil { +		t.Errorf("error fixing broken emojis: %v", err) +		return +	} + +	// Check expected were fixed. +	if found != len(fixIDs) { +		t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found) +		return +	} + +	if gtscontext.DryRun(ctx) { +		// nothing else to test. +		return +	} + +	for _, id := range fixIDs { +		// Fetch the emoji by ID that should now be fixed. +		emoji, err := suite.state.DB.GetEmojiByID(ctx, id) +		if err != nil { +			t.Fatalf("error fetching emoji from database: %v", err) +		} + +		// Ensure emoji cache state has been fixed. +		ok, err := suite.shouldFixEmojiCacheState(ctx, emoji) +		if err != nil { +			t.Fatalf("error checking whether emoji should be fixed: %v", err) +		} else if ok { +			t.Errorf("emoji %s@%s cache state should have been fixed", emoji.Shortcode, emoji.Domain) +		} +	} +} + +func (suite *CleanerTestSuite) shouldFixEmojiCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { +	// Check whether emoji image path exists. +	haveImage, err := suite.state.Storage.Has(ctx, emoji.ImagePath) +	if err != nil { +		return false, err +	} + +	// Check whether emoji static path exists. +	haveStatic, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath) +	if err != nil { +		return false, err +	} + +	switch exists := (haveImage && haveStatic); { +	case emoji.Cached != nil && +		*emoji.Cached && !exists: +		// (cached can be nil in tests) +		// Cached but missing files. +		return true, nil + +	case emoji.Cached != nil && +		!*emoji.Cached && exists: +		// (cached can be nil in tests) +		// Uncached but unexpected files. +		return true, nil + +	default: +		// No cache state issue. +		return false, nil +	} +} diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go index 51a0aea6d..8b11a30bf 100644 --- a/internal/cleaner/media.go +++ b/internal/cleaner/media.go @@ -96,9 +96,9 @@ func (m *Media) PruneOrphaned(ctx context.Context) (int, error) {  	// All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext}  	if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error { +		// Check for our expected fileserver path format.  		if !regexes.FilePath.MatchString(path) { -			// This is not our expected media -			// path format, skip this one. +			log.Warn(ctx, "unexpected storage item: %s", path)  			return nil  		} @@ -177,10 +177,10 @@ func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er  	mostRecent := olderThan  	for { -		// Fetch the next batch of attachments older than last-set time. -		attachments, err := m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectLimit) +		// Fetch the next batch of cached attachments older than last-set time. +		attachments, err := m.state.DB.GetCachedAttachmentsOlderThan(ctx, olderThan, selectLimit)  		if err != nil && !errors.Is(err, db.ErrNoEntries) { -			return total, gtserror.Newf("error getting remote media: %w", err) +			return total, gtserror.Newf("error getting remote attachments: %w", err)  		}  		if len(attachments) == 0 { @@ -220,9 +220,9 @@ func (m *Media) FixCacheStates(ctx context.Context) (int, error) {  	for {  		// Fetch the next batch of media attachments up to next max ID. -		attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) +		attachments, err := m.state.DB.GetRemoteAttachments(ctx, maxID, selectLimit)  		if err != nil && !errors.Is(err, db.ErrNoEntries) { -			return total, gtserror.Newf("error getting avatars / headers: %w", err) +			return total, gtserror.Newf("error getting remote attachments: %w", err)  		}  		if len(attachments) == 0 { @@ -323,7 +323,7 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment  	l := log.WithContext(ctx).  		WithField("media", media.ID) -		// Check whether we have the required account for media. +	// Check whether we have the required account for media.  	account, missing, err := m.getRelatedAccount(ctx, media)  	if err != nil {  		return false, err @@ -367,14 +367,6 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment  }  func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { -	if !*media.Cached { -		// We ignore uncached media, a -		// false negative is a much better -		// situation than a false positive, -		// re-cache will just overwrite it. -		return false, nil -	} -  	// Start a log entry for media.  	l := log.WithContext(ctx).  		WithField("media", media.ID) @@ -397,15 +389,33 @@ func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachme  		return false, nil  	} -	// So we know this a valid cached media entry. -	// Check that we have the files on disk required.... -	return m.checkFiles(ctx, func() error { -		l.Debug("uncaching due to missing media") -		return m.uncache(ctx, media) -	}, +	// Check whether files exist. +	exist, err := m.haveFiles(ctx,  		media.Thumbnail.Path,  		media.File.Path,  	) +	if err != nil { +		return false, err +	} + +	switch { +	case *media.Cached && !exist: +		// Mark as uncached if expected files don't exist. +		l.Debug("cached=true exists=false => uncaching") +		return true, m.uncache(ctx, media) + +	case !*media.Cached && exist: +		// Remove files if we don't expect them to exist. +		l.Debug("cached=false exists=true => deleting") +		_, err := m.removeFiles(ctx, +			media.Thumbnail.Path, +			media.File.Path, +		) +		return true, err + +	default: +		return false, nil +	}  }  func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) { diff --git a/internal/db/account.go b/internal/db/account.go index 4a08918b0..2e113c35e 100644 --- a/internal/db/account.go +++ b/internal/db/account.go @@ -73,6 +73,9 @@ type Account interface {  	// GetAccountFaves fetches faves/likes created by the target accountID.  	GetAccountFaves(ctx context.Context, accountID string) ([]*gtsmodel.StatusFave, Error) +	// GetAccountsUsingEmoji fetches all account models using emoji with given ID stored in their 'emojis' column. +	GetAccountsUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Account, error) +  	// GetAccountStatusesCount is a shortcut for the common action of counting statuses produced by accountID.  	CountAccountStatuses(ctx context.Context, accountID string) (int, Error) diff --git a/internal/db/bundb/account.go b/internal/db/bundb/account.go index 17339732e..179db6bb3 100644 --- a/internal/db/bundb/account.go +++ b/internal/db/bundb/account.go @@ -56,6 +56,27 @@ func (a *accountDB) GetAccountByID(ctx context.Context, id string) (*gtsmodel.Ac  	)  } +func (a *accountDB) GetAccountsByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Account, error) { +	accounts := make([]*gtsmodel.Account, 0, len(ids)) + +	for _, id := range ids { +		// Attempt to fetch account from DB. +		account, err := a.GetAccountByID( +			gtscontext.SetBarebones(ctx), +			id, +		) +		if err != nil { +			log.Errorf(ctx, "error getting account %q: %v", id, err) +			continue +		} + +		// Append account to return slice. +		accounts = append(accounts, account) +	} + +	return accounts, nil +} +  func (a *accountDB) GetAccountByURI(ctx context.Context, uri string) (*gtsmodel.Account, db.Error) {  	return a.getAccount(  		ctx, @@ -444,6 +465,34 @@ func (a *accountDB) GetAccountCustomCSSByUsername(ctx context.Context, username  	return account.CustomCSS, nil  } +func (a *accountDB) GetAccountsUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Account, error) { +	var accountIDs []string + +	// Create SELECT account query. +	q := a.conn.NewSelect(). +		Table("accounts"). +		Column("id") + +	// Append a WHERE LIKE clause to the query +	// that checks the `emoji` column for any +	// text containing this specific emoji ID. +	// +	// The reason we do this instead of doing a +	// `WHERE ? IN (emojis)` is that the latter +	// ends up being much MUCH slower, and the +	// database stores this ID-array-column as +	// text anyways, allowing a simple LIKE query. +	q = whereLike(q, "emojis", emojiID) + +	// Execute the query, scanning destination into accountIDs. +	if _, err := q.Exec(ctx, &accountIDs); err != nil { +		return nil, a.conn.ProcessError(err) +	} + +	// Convert account IDs into account objects. +	return a.GetAccountsByIDs(ctx, accountIDs) +} +  func (a *accountDB) GetAccountFaves(ctx context.Context, accountID string) ([]*gtsmodel.StatusFave, db.Error) {  	faves := new([]*gtsmodel.StatusFave) diff --git a/internal/db/bundb/emoji.go b/internal/db/bundb/emoji.go index 60c140264..321b5c0e7 100644 --- a/internal/db/bundb/emoji.go +++ b/internal/db/bundb/emoji.go @@ -126,12 +126,20 @@ func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error {  			return err  		} -		// Select all accounts using this emoji. -		if _, err := tx.NewSelect(). +		// Prepare SELECT accounts query. +		aq := tx.NewSelect().  			Table("accounts"). -			Column("id"). -			Where("? IN (emojis)", id). -			Exec(ctx, &accountIDs); err != nil { +			Column("id") + +		// Append a WHERE LIKE clause to the query +		// that checks the `emoji` column for any +		// text containing this specific emoji ID. +		// +		// (see GetStatusesUsingEmoji() for details.) +		aq = whereLike(aq, "emojis", id) + +		// Select all accounts using this emoji into accountIDss. +		if _, err := aq.Exec(ctx, &accountIDs); err != nil {  			return err  		} @@ -162,12 +170,20 @@ func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error {  			}  		} -		// Select all statuses using this emoji. -		if _, err := tx.NewSelect(). +		// Prepare SELECT statuses query. +		sq := tx.NewSelect().  			Table("statuses"). -			Column("id"). -			Where("? IN (emojis)", id). -			Exec(ctx, &statusIDs); err != nil { +			Column("id") + +		// Append a WHERE LIKE clause to the query +		// that checks the `emoji` column for any +		// text containing this specific emoji ID. +		// +		// (see GetStatusesUsingEmoji() for details.) +		sq = whereLike(sq, "emojis", id) + +		// Select all statuses using this emoji into statusIDs. +		if _, err := sq.Exec(ctx, &statusIDs); err != nil {  			return err  		} @@ -328,7 +344,7 @@ func (e *emojiDB) GetEmojisBy(ctx context.Context, domain string, includeDisable  }  func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) { -	emojiIDs := []string{} +	var emojiIDs []string  	q := e.conn.NewSelect().  		Table("emojis"). @@ -336,9 +352,55 @@ func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gt  		Order("id DESC")  	if maxID != "" { -		q = q.Where("? < ?", bun.Ident("id"), maxID) +		q = q.Where("id < ?", maxID) +	} + +	if limit != 0 { +		q = q.Limit(limit)  	} +	if err := q.Scan(ctx, &emojiIDs); err != nil { +		return nil, e.conn.ProcessError(err) +	} + +	return e.GetEmojisByIDs(ctx, emojiIDs) +} + +func (e *emojiDB) GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) { +	var emojiIDs []string + +	q := e.conn.NewSelect(). +		Table("emojis"). +		Column("id"). +		Where("domain IS NOT NULL"). +		Order("id DESC") + +	if maxID != "" { +		q = q.Where("id < ?", maxID) +	} + +	if limit != 0 { +		q = q.Limit(limit) +	} + +	if err := q.Scan(ctx, &emojiIDs); err != nil { +		return nil, e.conn.ProcessError(err) +	} + +	return e.GetEmojisByIDs(ctx, emojiIDs) +} + +func (e *emojiDB) GetCachedEmojisOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.Emoji, error) { +	var emojiIDs []string + +	q := e.conn.NewSelect(). +		Table("emojis"). +		Column("id"). +		Where("cached = true"). +		Where("domain IS NOT NULL"). +		Where("created_at < ?", olderThan). +		Order("created_at DESC") +  	if limit != 0 {  		q = q.Limit(limit)  	} diff --git a/internal/db/bundb/media.go b/internal/db/bundb/media.go index 80a4f8bbe..c190df44a 100644 --- a/internal/db/bundb/media.go +++ b/internal/db/bundb/media.go @@ -232,29 +232,6 @@ func (m *mediaDB) DeleteAttachment(ctx context.Context, id string) error {  	return m.conn.ProcessError(err)  } -func (m *mediaDB) GetRemoteOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { -	attachmentIDs := []string{} - -	q := m.conn. -		NewSelect(). -		TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). -		Column("media_attachment.id"). -		Where("? = ?", bun.Ident("media_attachment.cached"), true). -		Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan). -		Where("? IS NOT NULL", bun.Ident("media_attachment.remote_url")). -		Order("media_attachment.created_at DESC") - -	if limit != 0 { -		q = q.Limit(limit) -	} - -	if err := q.Scan(ctx, &attachmentIDs); err != nil { -		return nil, m.conn.ProcessError(err) -	} - -	return m.GetAttachmentsByIDs(ctx, attachmentIDs) -} -  func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time) (int, db.Error) {  	q := m.conn.  		NewSelect(). @@ -273,15 +250,39 @@ func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time)  }  func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) { -	attachmentIDs := []string{} +	attachmentIDs := make([]string, 0, limit) + +	q := m.conn.NewSelect(). +		Table("media_attachments"). +		Column("id"). +		Order("id DESC") + +	if maxID != "" { +		q = q.Where("id < ?", maxID) +	} + +	if limit != 0 { +		q = q.Limit(limit) +	} + +	if err := q.Scan(ctx, &attachmentIDs); err != nil { +		return nil, m.conn.ProcessError(err) +	} + +	return m.GetAttachmentsByIDs(ctx, attachmentIDs) +} + +func (m *mediaDB) GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) { +	attachmentIDs := make([]string, 0, limit)  	q := m.conn.NewSelect().  		Table("media_attachments").  		Column("id"). +		Where("remote_url IS NOT NULL").  		Order("id DESC")  	if maxID != "" { -		q = q.Where("? < ?", bun.Ident("id"), maxID) +		q = q.Where("id < ?", maxID)  	}  	if limit != 0 { @@ -295,8 +296,31 @@ func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) (  	return m.GetAttachmentsByIDs(ctx, attachmentIDs)  } +func (m *mediaDB) GetCachedAttachmentsOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { +	attachmentIDs := make([]string, 0, limit) + +	q := m.conn. +		NewSelect(). +		Table("media_attachments"). +		Column("id"). +		Where("cached = true"). +		Where("remote_url IS NOT NULL"). +		Where("created_at < ?", olderThan). +		Order("created_at DESC") + +	if limit != 0 { +		q = q.Limit(limit) +	} + +	if err := q.Scan(ctx, &attachmentIDs); err != nil { +		return nil, m.conn.ProcessError(err) +	} + +	return m.GetAttachmentsByIDs(ctx, attachmentIDs) +} +  func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { -	attachmentIDs := []string{} +	attachmentIDs := make([]string, 0, limit)  	q := m.conn.NewSelect().  		TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). @@ -324,7 +348,7 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit  }  func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { -	attachmentIDs := []string{} +	attachmentIDs := make([]string, 0, limit)  	q := m.conn.  		NewSelect(). diff --git a/internal/db/bundb/media_test.go b/internal/db/bundb/media_test.go index 6b419022a..59b927119 100644 --- a/internal/db/bundb/media_test.go +++ b/internal/db/bundb/media_test.go @@ -38,7 +38,7 @@ func (suite *MediaTestSuite) TestGetAttachmentByID() {  }  func (suite *MediaTestSuite) TestGetOlder() { -	attachments, err := suite.db.GetRemoteOlderThan(context.Background(), time.Now(), 20) +	attachments, err := suite.db.GetCachedAttachmentsOlderThan(context.Background(), time.Now(), 20)  	suite.NoError(err)  	suite.Len(attachments, 2)  } diff --git a/internal/db/bundb/migrations/20230724100000_emoji_cleanup.go b/internal/db/bundb/migrations/20230724100000_emoji_cleanup.go new file mode 100644 index 000000000..0ee501395 --- /dev/null +++ b/internal/db/bundb/migrations/20230724100000_emoji_cleanup.go @@ -0,0 +1,55 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program.  If not, see <http://www.gnu.org/licenses/>. + +package migrations + +import ( +	"context" +	"strings" + +	"github.com/uptrace/bun" +) + +func init() { +	up := func(ctx context.Context, db *bun.DB) error { +		_, err := db.ExecContext(ctx, "ALTER TABLE emojis ADD COLUMN cached BOOLEAN DEFAULT false") + +		if err != nil && !(strings.Contains(err.Error(), "already exists") || strings.Contains(err.Error(), "duplicate column name") || strings.Contains(err.Error(), "SQLSTATE 42701")) { +			return err +		} + +		if _, err := db.NewUpdate(). +			Table("emojis"). +			Where("disabled = false"). +			Set("cached = true"). +			Exec(ctx); err != nil { +			return err +		} + +		return nil +	} + +	down := func(ctx context.Context, db *bun.DB) error { +		return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { +			return nil +		}) +	} + +	if err := Migrations.Register(up, down); err != nil { +		panic(err) +	} +} diff --git a/internal/db/bundb/report.go b/internal/db/bundb/report.go index e017a8906..ee8aa1cb3 100644 --- a/internal/db/bundb/report.go +++ b/internal/db/bundb/report.go @@ -149,7 +149,7 @@ func (r *reportDB) getReport(ctx context.Context, lookup string, dbQuery func(*g  	if len(report.StatusIDs) > 0 {  		// Fetch reported statuses -		report.Statuses, err = r.state.DB.GetStatuses(ctx, report.StatusIDs) +		report.Statuses, err = r.state.DB.GetStatusesByIDs(ctx, report.StatusIDs)  		if err != nil {  			return nil, fmt.Errorf("error getting status mentions: %w", err)  		} diff --git a/internal/db/bundb/search.go b/internal/db/bundb/search.go index c05ebb8b1..1d7eefd48 100644 --- a/internal/db/bundb/search.go +++ b/internal/db/bundb/search.go @@ -19,7 +19,6 @@ package bundb  import (  	"context" -	"strings"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/id" @@ -61,40 +60,6 @@ type searchDB struct {  	state *state.State  } -// replacer is a thread-safe string replacer which escapes -// common SQLite + Postgres `LIKE` wildcard chars using the -// escape character `\`. Initialized as a var in this package -// so it can be reused. -var replacer = strings.NewReplacer( -	`\`, `\\`, // Escape char. -	`%`, `\%`, // Zero or more char. -	`_`, `\_`, // Exactly one char. -) - -// whereSubqueryLike appends a WHERE clause to the -// given SelectQuery q, which searches for matches -// of searchQuery in the given subQuery using LIKE. -func whereSubqueryLike( -	q *bun.SelectQuery, -	subQuery *bun.SelectQuery, -	searchQuery string, -) *bun.SelectQuery { -	// Escape existing wildcard + escape -	// chars in the search query string. -	searchQuery = replacer.Replace(searchQuery) - -	// Add our own wildcards back in; search -	// zero or more chars around the query. -	searchQuery = `%` + searchQuery + `%` - -	// Append resulting WHERE -	// clause to the main query. -	return q.Where( -		"(?) LIKE ? ESCAPE ?", -		subQuery, searchQuery, `\`, -	) -} -  // Query example (SQLite):  //  //	SELECT "account"."id" FROM "accounts" AS "account" @@ -167,7 +132,7 @@ func (s *searchDB) SearchForAccounts(  	// Search using LIKE for matches of query  	// string within accountText subquery. -	q = whereSubqueryLike(q, accountTextSubq, query) +	q = whereLike(q, accountTextSubq, query)  	if limit > 0 {  		// Limit amount of accounts returned. @@ -345,7 +310,7 @@ func (s *searchDB) SearchForStatuses(  	// Search using LIKE for matches of query  	// string within statusText subquery. -	q = whereSubqueryLike(q, statusTextSubq, query) +	q = whereLike(q, statusTextSubq, query)  	if limit > 0 {  		// Limit amount of statuses returned. diff --git a/internal/db/bundb/status.go b/internal/db/bundb/status.go index 0dffbabcc..ccfc9fd4b 100644 --- a/internal/db/bundb/status.go +++ b/internal/db/bundb/status.go @@ -58,18 +58,18 @@ func (s *statusDB) GetStatusByID(ctx context.Context, id string) (*gtsmodel.Stat  	)  } -func (s *statusDB) GetStatuses(ctx context.Context, ids []string) ([]*gtsmodel.Status, db.Error) { +func (s *statusDB) GetStatusesByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Status, error) {  	statuses := make([]*gtsmodel.Status, 0, len(ids))  	for _, id := range ids { -		// Attempt fetch from DB +		// Attempt to fetch status from DB.  		status, err := s.GetStatusByID(ctx, id)  		if err != nil {  			log.Errorf(ctx, "error getting status %q: %v", id, err)  			continue  		} -		// Append status +		// Append status to return slice.  		statuses = append(statuses, status)  	} @@ -429,6 +429,34 @@ func (s *statusDB) DeleteStatusByID(ctx context.Context, id string) db.Error {  	})  } +func (s *statusDB) GetStatusesUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Status, error) { +	var statusIDs []string + +	// Create SELECT status query. +	q := s.conn.NewSelect(). +		Table("statuses"). +		Column("id") + +	// Append a WHERE LIKE clause to the query +	// that checks the `emoji` column for any +	// text containing this specific emoji ID. +	// +	// The reason we do this instead of doing a +	// `WHERE ? IN (emojis)` is that the latter +	// ends up being much MUCH slower, and the +	// database stores this ID-array-column as +	// text anyways, allowing a simple LIKE query. +	q = whereLike(q, "emojis", emojiID) + +	// Execute the query, scanning destination into statusIDs. +	if _, err := q.Exec(ctx, &statusIDs); err != nil { +		return nil, s.conn.ProcessError(err) +	} + +	// Convert status IDs into status objects. +	return s.GetStatusesByIDs(ctx, statusIDs) +} +  func (s *statusDB) GetStatusParents(ctx context.Context, status *gtsmodel.Status, onlyDirect bool) ([]*gtsmodel.Status, db.Error) {  	if onlyDirect {  		// Only want the direct parent, no further than first level diff --git a/internal/db/bundb/status_test.go b/internal/db/bundb/status_test.go index cab7501f9..a69608796 100644 --- a/internal/db/bundb/status_test.go +++ b/internal/db/bundb/status_test.go @@ -50,13 +50,13 @@ func (suite *StatusTestSuite) TestGetStatusByID() {  	suite.True(*status.Likeable)  } -func (suite *StatusTestSuite) TestGetStatusesByID() { +func (suite *StatusTestSuite) TestGetStatusesByIDs() {  	ids := []string{  		suite.testStatuses["local_account_1_status_1"].ID,  		suite.testStatuses["local_account_2_status_3"].ID,  	} -	statuses, err := suite.db.GetStatuses(context.Background(), ids) +	statuses, err := suite.db.GetStatusesByIDs(context.Background(), ids)  	if err != nil {  		suite.FailNow(err.Error())  	} diff --git a/internal/db/bundb/util.go b/internal/db/bundb/util.go index 06bb289d3..bdd45d1e7 100644 --- a/internal/db/bundb/util.go +++ b/internal/db/bundb/util.go @@ -18,10 +18,46 @@  package bundb  import ( +	"strings" +  	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/uptrace/bun"  ) +// likeEscaper is a thread-safe string replacer which escapes +// common SQLite + Postgres `LIKE` wildcard chars using the +// escape character `\`. Initialized as a var in this package +// so it can be reused. +var likeEscaper = strings.NewReplacer( +	`\`, `\\`, // Escape char. +	`%`, `\%`, // Zero or more char. +	`_`, `\_`, // Exactly one char. +) + +// whereSubqueryLike appends a WHERE clause to the +// given SelectQuery, which searches for matches +// of `search` in the given subQuery using LIKE. +func whereLike( +	query *bun.SelectQuery, +	subject interface{}, +	search string, +) *bun.SelectQuery { +	// Escape existing wildcard + escape +	// chars in the search query string. +	search = likeEscaper.Replace(search) + +	// Add our own wildcards back in; search +	// zero or more chars around the query. +	search = `%` + search + `%` + +	// Append resulting WHERE +	// clause to the main query. +	return query.Where( +		"(?) LIKE ? ESCAPE ?", +		subject, search, `\`, +	) +} +  // updateWhere parses []db.Where and adds it to the given update query.  func updateWhere(q *bun.UpdateQuery, where []db.Where) {  	for _, w := range where { diff --git a/internal/db/emoji.go b/internal/db/emoji.go index 5dcad9ece..67d7f7232 100644 --- a/internal/db/emoji.go +++ b/internal/db/emoji.go @@ -19,6 +19,7 @@ package db  import (  	"context" +	"time"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  ) @@ -40,8 +41,16 @@ type Emoji interface {  	GetEmojisByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Emoji, Error)  	// GetUseableEmojis gets all emojis which are useable by accounts on this instance.  	GetUseableEmojis(ctx context.Context) ([]*gtsmodel.Emoji, Error) -	// GetEmojis ... + +	// GetEmojis fetches all emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis.  	GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) + +	// GetRemoteEmojis fetches all remote emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis. +	GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) + +	// GetCachedEmojisOlderThan fetches all cached remote emojis with 'updated_at' greater than 'olderThan', up to a maximum of 'limit' emojis. +	GetCachedEmojisOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.Emoji, error) +  	// GetEmojisBy gets emojis based on given parameters. Useful for admin actions.  	GetEmojisBy(ctx context.Context, domain string, includeDisabled bool, includeEnabled bool, shortcode string, maxShortcodeDomain string, minShortcodeDomain string, limit int) ([]*gtsmodel.Emoji, error)  	// GetEmojiByID gets a specific emoji by its database ID. diff --git a/internal/db/media.go b/internal/db/media.go index 01bca1748..5fb18a8fe 100644 --- a/internal/db/media.go +++ b/internal/db/media.go @@ -44,12 +44,12 @@ type Media interface {  	// GetAttachments ...  	GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) -	// GetRemoteOlderThan gets limit n remote media attachments (including avatars and headers) older than the given -	// olderThan time. These will be returned in order of attachment.created_at descending (newest to oldest in other words). -	// -	// The selected media attachments will be those with both a URL and a RemoteURL filled in. -	// In other words, media attachments that originated remotely, and that we currently have cached locally. -	GetRemoteOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error) +	// GetRemoteAttachments ... +	GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) + +	// GetCachedAttachmentsOlderThan gets limit n remote attachments (including avatars and headers) older than +	// the given time. These will be returned in order of attachment.created_at descending (i.e. newest to oldest). +	GetCachedAttachmentsOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error)  	// CountRemoteOlderThan is like GetRemoteOlderThan, except instead of getting limit n attachments,  	// it just counts how many remote attachments in the database (including avatars and headers) meet diff --git a/internal/db/status.go b/internal/db/status.go index fdce19094..c0e330260 100644 --- a/internal/db/status.go +++ b/internal/db/status.go @@ -28,9 +28,6 @@ type Status interface {  	// GetStatusByID returns one status from the database, with no rel fields populated, only their linking ID / URIs  	GetStatusByID(ctx context.Context, id string) (*gtsmodel.Status, Error) -	// GetStatuses gets a slice of statuses corresponding to the given status IDs. -	GetStatuses(ctx context.Context, ids []string) ([]*gtsmodel.Status, Error) -  	// GetStatusByURI returns one status from the database, with no rel fields populated, only their linking ID / URIs  	GetStatusByURI(ctx context.Context, uri string) (*gtsmodel.Status, Error) @@ -58,6 +55,12 @@ type Status interface {  	// CountStatusFaves returns the amount of faves/likes recorded for a status, or an error if something goes wrong  	CountStatusFaves(ctx context.Context, status *gtsmodel.Status) (int, Error) +	// GetStatuses gets a slice of statuses corresponding to the given status IDs. +	GetStatusesByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Status, error) + +	// GetStatusesUsingEmoji fetches all status models using emoji with given ID stored in their 'emojis' column. +	GetStatusesUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Status, error) +  	// GetStatusParents gets the parent statuses of a given status.  	//  	// If onlyDirect is true, only the immediate parent will be returned. diff --git a/internal/gtsmodel/emoji.go b/internal/gtsmodel/emoji.go index 1e21c7d1e..0fcc3247b 100644 --- a/internal/gtsmodel/emoji.go +++ b/internal/gtsmodel/emoji.go @@ -42,4 +42,5 @@ type Emoji struct {  	VisibleInPicker        *bool          `validate:"-" bun:",nullzero,notnull,default:true"`                                                      // Is this emoji visible in the admin emoji picker?  	Category               *EmojiCategory `validate:"-" bun:"rel:belongs-to"`                                                                      // In which emoji category is this emoji visible?  	CategoryID             string         `validate:"omitempty,ulid" bun:"type:CHAR(26),nullzero"`                                                 // ID of the category this emoji belongs to. +	Cached                 *bool          `validate:"-" bun:",nullzero,notnull,default:false"`  } diff --git a/internal/media/manager.go b/internal/media/manager.go index 1d673128a..afe686cb9 100644 --- a/internal/media/manager.go +++ b/internal/media/manager.go @@ -51,12 +51,7 @@ type Manager struct {  	state *state.State  } -// NewManager returns a media manager with the given db and underlying storage. -// -// A worker pool will also be initialized for the manager, to ensure that only -// a limited number of media will be processed in parallel. The numbers of workers -// is determined from the $GOMAXPROCS environment variable (usually no. CPU cores). -// See internal/concurrency.NewWorkerPool() documentation for further information. +// NewManager returns a media manager with given state.  func NewManager(state *state.State) *Manager {  	m := &Manager{state: state}  	return m @@ -159,7 +154,7 @@ func (m *Manager) PreProcessMedia(ctx context.Context, data DataFunc, accountID  	return processingMedia, nil  } -// PreProcessMediaRecache refetches, reprocesses, and recaches an existing attachment that has been uncached via pruneRemote. +// PreProcessMediaRecache refetches, reprocesses, and recaches an existing attachment that has been uncached via cleaner pruning.  //  // Note: unlike ProcessMedia, this will NOT queue the media to be asychronously processed.  func (m *Manager) PreProcessMediaRecache(ctx context.Context, data DataFunc, attachmentID string) (*ProcessingMedia, error) { @@ -209,17 +204,18 @@ func (m *Manager) ProcessMedia(ctx context.Context, data DataFunc, accountID str  //  // Note: unlike ProcessEmoji, this will NOT queue the emoji to be asynchronously processed.  func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode string, emojiID string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) { -	instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "") -	if err != nil { -		return nil, gtserror.Newf("error fetching this instance account from the db: %s", err) -	} -  	var (  		newPathID string  		emoji     *gtsmodel.Emoji  		now       = time.Now()  	) +	// Fetch the local instance account for emoji path generation. +	instanceAcc, err := m.state.DB.GetInstanceAccount(ctx, "") +	if err != nil { +		return nil, gtserror.Newf("error fetching instance account: %w", err) +	} +  	if refresh {  		// Look for existing emoji by given ID.  		emoji, err = m.state.DB.GetEmojiByID(ctx, emojiID) @@ -261,8 +257,8 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode  		}  		// store + serve static image at new path ID -		emoji.ImageStaticURL = uris.GenerateURIForAttachment(instanceAccount.ID, string(TypeEmoji), string(SizeStatic), newPathID, mimePng) -		emoji.ImageStaticPath = fmt.Sprintf("%s/%s/%s/%s.%s", instanceAccount.ID, TypeEmoji, SizeStatic, newPathID, mimePng) +		emoji.ImageStaticURL = uris.GenerateURIForAttachment(instanceAcc.ID, string(TypeEmoji), string(SizeStatic), newPathID, mimePng) +		emoji.ImageStaticPath = fmt.Sprintf("%s/%s/%s/%s.%s", instanceAcc.ID, TypeEmoji, SizeStatic, newPathID, mimePng)  		emoji.Shortcode = shortcode  		emoji.URI = uri @@ -278,12 +274,12 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode  			Domain:                 "", // assume our own domain unless told otherwise  			ImageRemoteURL:         "",  			ImageStaticRemoteURL:   "", -			ImageURL:               "",                                                                                                         // we don't know yet -			ImageStaticURL:         uris.GenerateURIForAttachment(instanceAccount.ID, string(TypeEmoji), string(SizeStatic), emojiID, mimePng), // all static emojis are encoded as png -			ImagePath:              "",                                                                                                         // we don't know yet -			ImageStaticPath:        fmt.Sprintf("%s/%s/%s/%s.%s", instanceAccount.ID, TypeEmoji, SizeStatic, emojiID, mimePng),                 // all static emojis are encoded as png -			ImageContentType:       "",                                                                                                         // we don't know yet -			ImageStaticContentType: mimeImagePng,                                                                                               // all static emojis are encoded as png +			ImageURL:               "",                                                                                                     // we don't know yet +			ImageStaticURL:         uris.GenerateURIForAttachment(instanceAcc.ID, string(TypeEmoji), string(SizeStatic), emojiID, mimePng), // all static emojis are encoded as png +			ImagePath:              "",                                                                                                     // we don't know yet +			ImageStaticPath:        fmt.Sprintf("%s/%s/%s/%s.%s", instanceAcc.ID, TypeEmoji, SizeStatic, emojiID, mimePng),                 // all static emojis are encoded as png +			ImageContentType:       "",                                                                                                     // we don't know yet +			ImageStaticContentType: mimeImagePng,                                                                                           // all static emojis are encoded as png  			ImageFileSize:          0,  			ImageStaticFileSize:    0,  			Disabled:               &disabled, @@ -329,9 +325,8 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode  	}  	processingEmoji := &ProcessingEmoji{ -		instAccID: instanceAccount.ID,  		emoji:     emoji, -		refresh:   refresh, +		existing:  refresh,  		newPathID: newPathID,  		dataFn:    data,  		mgr:       m, @@ -340,6 +335,26 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode  	return processingEmoji, nil  } +// PreProcessEmojiRecache refetches, reprocesses, and recaches an existing emoji that has been uncached via cleaner pruning. +// +// Note: unlike ProcessEmoji, this will NOT queue the emoji to be asychronously processed. +func (m *Manager) PreProcessEmojiRecache(ctx context.Context, data DataFunc, emojiID string) (*ProcessingEmoji, error) { +	// get the existing emoji from the database. +	emoji, err := m.state.DB.GetEmojiByID(ctx, emojiID) +	if err != nil { +		return nil, err +	} + +	processingEmoji := &ProcessingEmoji{ +		emoji:    emoji, +		dataFn:   data, +		existing: true, // inidcate recache +		mgr:      m, +	} + +	return processingEmoji, nil +} +  // ProcessEmoji will call PreProcessEmoji, followed by queuing the emoji to be processing in the emoji worker queue.  func (m *Manager) ProcessEmoji(ctx context.Context, data DataFunc, shortcode string, id string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) {  	// Create a new processing emoji object for this emoji request. diff --git a/internal/media/processingemoji.go b/internal/media/processingemoji.go index d3a1edbf8..1c7e60144 100644 --- a/internal/media/processingemoji.go +++ b/internal/media/processingemoji.go @@ -31,16 +31,16 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/superseriousbusiness/gotosocial/internal/regexes"  	"github.com/superseriousbusiness/gotosocial/internal/uris"  )  // ProcessingEmoji represents an emoji currently processing. It exposes  // various functions for retrieving data from the process.  type ProcessingEmoji struct { -	instAccID string            // instance account ID  	emoji     *gtsmodel.Emoji   // processing emoji details -	refresh   bool              // whether this is an existing emoji being refreshed -	newPathID string            // new emoji path ID to use if refreshed +	existing  bool              // indicates whether this is an existing emoji ID being refreshed / recached +	newPathID string            // new emoji path ID to use when being refreshed  	dataFn    DataFunc          // load-data function, returns media stream  	done      bool              // done is set when process finishes with non ctx canceled type error  	proc      runners.Processor // proc helps synchronize only a singular running processing instance @@ -121,24 +121,9 @@ func (p *ProcessingEmoji) load(ctx context.Context) (*gtsmodel.Emoji, bool, erro  			return err  		} -		if p.refresh { -			columns := []string{ -				"image_remote_url", -				"image_static_remote_url", -				"image_url", -				"image_static_url", -				"image_path", -				"image_static_path", -				"image_content_type", -				"image_file_size", -				"image_static_file_size", -				"image_updated_at", -				"shortcode", -				"uri", -			} - -			// Existing emoji we're refreshing, so only need to update. -			err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji, columns...) +		if p.existing { +			// Existing emoji we're updating, so only update. +			err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji)  			return err  		} @@ -217,7 +202,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {  	var pathID string -	if p.refresh { +	if p.newPathID != "" {  		// This is a refreshed emoji with a new  		// path ID that this will be stored under.  		pathID = p.newPathID @@ -226,10 +211,13 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {  		pathID = p.emoji.ID  	} +	// Determine instance account ID from already generated image static path. +	instanceAccID := regexes.FilePath.FindStringSubmatch(p.emoji.ImageStaticPath)[1] +  	// Calculate emoji file path.  	p.emoji.ImagePath = fmt.Sprintf(  		"%s/%s/%s/%s.%s", -		p.instAccID, +		instanceAccID,  		TypeEmoji,  		SizeOriginal,  		pathID, @@ -258,12 +246,13 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {  		if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath); err != nil {  			log.Errorf(ctx, "error removing too-large-emoji from storage: %v", err)  		} +  		return gtserror.Newf("calculated emoji size %s greater than max allowed %s", size, maxSize)  	}  	// Fill in remaining attachment data now it's stored.  	p.emoji.ImageURL = uris.GenerateURIForAttachment( -		p.instAccID, +		instanceAccID,  		string(TypeEmoji),  		string(SizeOriginal),  		pathID, @@ -271,6 +260,10 @@ func (p *ProcessingEmoji) store(ctx context.Context) error {  	)  	p.emoji.ImageContentType = info.MIME.Value  	p.emoji.ImageFileSize = int(sz) +	p.emoji.Cached = func() *bool { +		ok := true +		return &ok +	}()  	return nil  } @@ -297,6 +290,7 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error {  	// This shouldn't already exist, but we do a check as it's worth logging.  	if have, _ := p.mgr.state.Storage.Has(ctx, p.emoji.ImageStaticPath); have {  		log.Warnf(ctx, "static emoji already exists at storage path: %s", p.emoji.ImagePath) +  		// Attempt to remove static existing emoji at storage path (might be broken / out-of-date)  		if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImageStaticPath); err != nil {  			return gtserror.Newf("error removing static emoji from storage: %v", err) diff --git a/internal/processing/admin/media.go b/internal/processing/admin/media.go index a457487b8..13dcb7d28 100644 --- a/internal/processing/admin/media.go +++ b/internal/processing/admin/media.go @@ -58,7 +58,7 @@ func (p *Processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gt  	go func() {  		ctx := context.Background()  		p.cleaner.Media().All(ctx, mediaRemoteCacheDays) -		p.cleaner.Emoji().All(ctx) +		p.cleaner.Emoji().All(ctx, mediaRemoteCacheDays)  	}()  	return nil diff --git a/internal/processing/fromclientapi.go b/internal/processing/fromclientapi.go index 159f09d1b..412403c44 100644 --- a/internal/processing/fromclientapi.go +++ b/internal/processing/fromclientapi.go @@ -982,7 +982,7 @@ func (p *Processor) federateReport(ctx context.Context, report *gtsmodel.Report)  	}  	if len(report.StatusIDs) > 0 && len(report.Statuses) == 0 { -		statuses, err := p.state.DB.GetStatuses(ctx, report.StatusIDs) +		statuses, err := p.state.DB.GetStatusesByIDs(ctx, report.StatusIDs)  		if err != nil {  			return fmt.Errorf("federateReport: error getting report statuses from database: %w", err)  		} diff --git a/internal/processing/media/getfile.go b/internal/processing/media/getfile.go index 27b08600e..386c3a9a2 100644 --- a/internal/processing/media/getfile.go +++ b/internal/processing/media/getfile.go @@ -118,7 +118,7 @@ func (p *Processor) getAttachmentContent(ctx context.Context, requestingAccount  	// retrieve attachment from the database and do basic checks on it  	a, err := p.state.DB.GetAttachmentByID(ctx, wantedMediaID)  	if err != nil { -		return nil, gtserror.NewErrorNotFound(fmt.Errorf("attachment %s could not be taken from the db: %s", wantedMediaID, err)) +		return nil, gtserror.NewErrorNotFound(fmt.Errorf("attachment %s could not be taken from the db: %w", wantedMediaID, err))  	}  	if a.AccountID != owningAccountID { @@ -131,7 +131,7 @@ func (p *Processor) getAttachmentContent(ctx context.Context, requestingAccount  		// 2. we need to fetch it again using a transport and the media manager  		remoteMediaIRI, err := url.Parse(a.RemoteURL)  		if err != nil { -			return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote media iri %s: %s", a.RemoteURL, err)) +			return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote media iri %s: %w", a.RemoteURL, err))  		}  		// use an empty string as requestingUsername to use the instance account, unless the request for this @@ -151,24 +151,24 @@ func (p *Processor) getAttachmentContent(ctx context.Context, requestingAccount  		//   recache operation -> holding open a media worker.  		// ] -		dataFn := func(innerCtx context.Context) (io.ReadCloser, int64, error) { -			t, err := p.transportController.NewTransportForUsername(innerCtx, requestingUsername) +		dataFn := func(ctx context.Context) (io.ReadCloser, int64, error) { +			t, err := p.transportController.NewTransportForUsername(ctx, requestingUsername)  			if err != nil {  				return nil, 0, err  			} -			return t.DereferenceMedia(gtscontext.SetFastFail(innerCtx), remoteMediaIRI) +			return t.DereferenceMedia(gtscontext.SetFastFail(ctx), remoteMediaIRI)  		}  		// Start recaching this media with the prepared data function.  		processingMedia, err := p.mediaManager.PreProcessMediaRecache(ctx, dataFn, wantedMediaID)  		if err != nil { -			return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching media: %s", err)) +			return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching media: %w", err))  		}  		// Load attachment and block until complete  		a, err = processingMedia.LoadAttachment(ctx)  		if err != nil { -			return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached attachment: %s", err)) +			return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached attachment: %w", err))  		}  	} @@ -205,17 +205,53 @@ func (p *Processor) getEmojiContent(ctx context.Context, fileName string, owning  	// for using the static URL rather than full size url  	// is that static emojis are always encoded as png,  	// so this is more reliable than using full size url -	imageStaticURL := uris.GenerateURIForAttachment(owningAccountID, string(media.TypeEmoji), string(media.SizeStatic), fileName, "png") +	imageStaticURL := uris.GenerateURIForAttachment( +		owningAccountID, +		string(media.TypeEmoji), +		string(media.SizeStatic), +		fileName, +		"png", +	)  	e, err := p.state.DB.GetEmojiByStaticURL(ctx, imageStaticURL)  	if err != nil { -		return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s could not be taken from the db: %s", fileName, err)) +		return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s could not be taken from the db: %w", fileName, err))  	}  	if *e.Disabled {  		return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s has been disabled", fileName))  	} +	if !*e.Cached { +		// if we don't have it cached, then we can assume two things: +		// 1. this is remote emoji, since local emoji should never be uncached +		// 2. we need to fetch it again using a transport and the media manager +		remoteURL, err := url.Parse(e.ImageRemoteURL) +		if err != nil { +			return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote emoji iri %s: %w", e.ImageRemoteURL, err)) +		} + +		dataFn := func(ctx context.Context) (io.ReadCloser, int64, error) { +			t, err := p.transportController.NewTransportForUsername(ctx, "") +			if err != nil { +				return nil, 0, err +			} +			return t.DereferenceMedia(gtscontext.SetFastFail(ctx), remoteURL) +		} + +		// Start recaching this emoji with the prepared data function. +		processingEmoji, err := p.mediaManager.PreProcessEmojiRecache(ctx, dataFn, e.ID) +		if err != nil { +			return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching emoji: %w", err)) +		} + +		// Load attachment and block until complete +		e, err = processingEmoji.LoadEmoji(ctx) +		if err != nil { +			return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached emoji: %w", err)) +		} +	} +  	switch emojiSize {  	case media.SizeOriginal:  		emojiContent.ContentType = e.ImageContentType diff --git a/internal/processing/report/create.go b/internal/processing/report/create.go index 9faffd2d4..a6cce8e80 100644 --- a/internal/processing/report/create.go +++ b/internal/processing/report/create.go @@ -51,7 +51,7 @@ func (p *Processor) Create(ctx context.Context, account *gtsmodel.Account, form  	}  	// fetch statuses by IDs given in the report form (noop if no statuses given) -	statuses, err := p.state.DB.GetStatuses(ctx, form.StatusIDs) +	statuses, err := p.state.DB.GetStatusesByIDs(ctx, form.StatusIDs)  	if err != nil {  		err = fmt.Errorf("db error fetching report target statuses: %w", err)  		return nil, gtserror.NewErrorInternalError(err) diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go index 88c832508..347a8a98b 100644 --- a/internal/regexes/regexes.go +++ b/internal/regexes/regexes.go @@ -70,7 +70,7 @@ const (  	statusesPath   = userPathPrefix + `/` + statuses + `/(` + ulid + `)$`  	blockPath      = userPathPrefix + `/` + blocks + `/(` + ulid + `)$`  	reportPath     = `^/?` + reports + `/(` + ulid + `)$` -	filePath       = `^/?(` + ulid + `)/([a-z]+)/([a-z]+)/(` + ulid + `)\.([a-z]+)$` +	filePath       = `^/?(` + ulid + `)/([a-z]+)/([a-z]+)/(` + ulid + `)\.([a-z0-9]+)$`  )  var ( diff --git a/internal/storage/storage.go b/internal/storage/storage.go index ea8184881..588c586d8 100644 --- a/internal/storage/storage.go +++ b/internal/storage/storage.go @@ -97,6 +97,9 @@ func (d *Driver) Has(ctx context.Context, key string) (bool, error) {  func (d *Driver) WalkKeys(ctx context.Context, walk func(context.Context, string) error) error {  	return d.Storage.WalkKeys(ctx, storage.WalkKeysOptions{  		WalkFn: func(ctx context.Context, entry storage.Entry) error { +			if entry.Key == "store.lock" { +				return nil // skip this. +			}  			return walk(ctx, entry.Key)  		},  	}) diff --git a/internal/typeutils/internaltofrontend.go b/internal/typeutils/internaltofrontend.go index 17b8047e9..03d0bfcab 100644 --- a/internal/typeutils/internaltofrontend.go +++ b/internal/typeutils/internaltofrontend.go @@ -1122,7 +1122,7 @@ func (c *converter) ReportToAdminAPIReport(ctx context.Context, r *gtsmodel.Repo  	statuses := make([]*apimodel.Status, 0, len(r.StatusIDs))  	if len(r.StatusIDs) != 0 && len(r.Statuses) == 0 { -		r.Statuses, err = c.db.GetStatuses(ctx, r.StatusIDs) +		r.Statuses, err = c.db.GetStatusesByIDs(ctx, r.StatusIDs)  		if err != nil {  			return nil, fmt.Errorf("ReportToAdminAPIReport: error getting statuses from the db: %w", err)  		}  | 
