diff options
author | 2023-06-22 20:46:36 +0100 | |
---|---|---|
committer | 2023-06-22 20:46:36 +0100 | |
commit | 9a22102fa8b1ce47571d5bba71e8f36895d21bf0 (patch) | |
tree | 3c2af6db0a3905d31243cd840d1dd50bea59dbb0 /internal/cleaner | |
parent | [docs] Clarify email requirement for OIDC (#1918) (diff) | |
download | gotosocial-9a22102fa8b1ce47571d5bba71e8f36895d21bf0.tar.xz |
[bugfix/chore] oauth entropy fix + media cleanup tasks rewrite (#1853)
Diffstat (limited to 'internal/cleaner')
-rw-r--r-- | internal/cleaner/cleaner.go | 135 | ||||
-rw-r--r-- | internal/cleaner/emoji.go | 238 | ||||
-rw-r--r-- | internal/cleaner/media.go | 547 | ||||
-rw-r--r-- | internal/cleaner/media_test.go | 427 |
4 files changed, 1347 insertions, 0 deletions
diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go new file mode 100644 index 000000000..ee1e4785f --- /dev/null +++ b/internal/cleaner/cleaner.go @@ -0,0 +1,135 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package cleaner + +import ( + "context" + "errors" + "time" + + "codeberg.org/gruf/go-runners" + "codeberg.org/gruf/go-sched" + "codeberg.org/gruf/go-store/v2/storage" + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/state" +) + +const ( + selectLimit = 50 +) + +type Cleaner struct { + state *state.State + emoji Emoji + media Media +} + +func New(state *state.State) *Cleaner { + c := new(Cleaner) + c.state = state + c.emoji.Cleaner = c + c.media.Cleaner = c + scheduleJobs(c) + return c +} + +// Emoji returns the emoji set of cleaner utilities. +func (c *Cleaner) Emoji() *Emoji { + return &c.emoji +} + +// Media returns the media set of cleaner utilities. +func (c *Cleaner) Media() *Media { + return &c.media +} + +// checkFiles checks for each of the provided files, and calls onMissing() if any of them are missing. Returns true if missing. +func (c *Cleaner) checkFiles(ctx context.Context, onMissing func() error, files ...string) (bool, error) { + for _, file := range files { + // Check whether each file exists in storage. + have, err := c.state.Storage.Has(ctx, file) + if err != nil { + return false, gtserror.Newf("error checking storage for %s: %w", file, err) + } else if !have { + // Missing files, perform hook. + return true, onMissing() + } + } + return false, nil +} + +// removeFiles removes the provided files, returning the number of them returned. +func (c *Cleaner) removeFiles(ctx context.Context, files ...string) (int, error) { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return len(files), nil + } + + var errs gtserror.MultiError + + for _, path := range files { + // Remove each provided storage path. + log.Debugf(ctx, "removing file: %s", path) + err := c.state.Storage.Delete(ctx, path) + if err != nil && !errors.Is(err, storage.ErrNotFound) { + errs.Appendf("error removing %s: %v", path, err) + } + } + + // Calculate no. files removed. + diff := len(files) - len(errs) + + // Wrap the combined error slice. + if err := errs.Combine(); err != nil { + return diff, gtserror.Newf("error(s) removing files: %w", err) + } + + return diff, nil +} + +func scheduleJobs(c *Cleaner) { + const day = time.Hour * 24 + + // Calculate closest midnight. + now := time.Now() + midnight := now.Round(day) + + if midnight.Before(now) { + // since <= 11:59am rounds down. + midnight = midnight.Add(day) + } + + // Get ctx associated with scheduler run state. + done := c.state.Workers.Scheduler.Done() + doneCtx := runners.CancelCtx(done) + + // TODO: we'll need to do some thinking to make these + // jobs restartable if we want to implement reloads in + // the future that make call to Workers.Stop() -> Workers.Start(). + + // Schedule the cleaning tasks to execute every day at midnight. + c.state.Workers.Scheduler.Schedule(sched.NewJob(func(start time.Time) { + log.Info(nil, "starting media clean") + c.Media().All(doneCtx, config.GetMediaRemoteCacheDays()) + c.Emoji().All(doneCtx) + log.Infof(nil, "finished media clean after %s", time.Since(start)) + }).EveryAt(midnight, day)) +} diff --git a/internal/cleaner/emoji.go b/internal/cleaner/emoji.go new file mode 100644 index 000000000..35e579171 --- /dev/null +++ b/internal/cleaner/emoji.go @@ -0,0 +1,238 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package cleaner + +import ( + "context" + "errors" + + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" +) + +// Emoji encompasses a set of +// emoji cleanup / admin utils. +type Emoji struct { + *Cleaner +} + +// All will execute all cleaner.Emoji utilities synchronously, including output logging. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (e *Emoji) All(ctx context.Context) { + e.LogPruneMissing(ctx) + e.LogFixBroken(ctx) +} + +// LogPruneMissing performs emoji.PruneMissing(...), logging the start and outcome. +func (e *Emoji) LogPruneMissing(ctx context.Context) { + log.Info(ctx, "start") + if n, err := e.PruneMissing(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogFixBroken performs emoji.FixBroken(...), logging the start and outcome. +func (e *Emoji) LogFixBroken(ctx context.Context) { + log.Info(ctx, "start") + if n, err := e.FixBroken(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "fixed: %d", n) + } +} + +// PruneMissing will delete emoji with missing files from the database and storage driver. +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) PruneMissing(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of emoji media up to next ID. + emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting emojis: %w", err) + } + + if len(emojis) == 0 { + // reached end. + break + } + + // Use last as the next 'maxID' value. + maxID = emojis[len(emojis)-1].ID + + for _, emoji := range emojis { + // Check / fix missing emoji media. + fixed, err := e.pruneMissing(ctx, emoji) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// FixBroken will check all emojis for valid related models (e.g. category). +// Broken media will be automatically updated to remove now-missing models. +// Context will be checked for `gtscontext.DryRun()` to perform the action. +func (e *Emoji) FixBroken(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of emoji media up to next ID. + emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting emojis: %w", err) + } + + if len(emojis) == 0 { + // reached end. + break + } + + // Use last as the next 'maxID' value. + maxID = emojis[len(emojis)-1].ID + + for _, emoji := range emojis { + // Check / fix missing broken emoji. + fixed, err := e.fixBroken(ctx, emoji) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +func (e *Emoji) pruneMissing(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + return e.checkFiles(ctx, func() error { + // Emoji missing files, delete it. + // NOTE: if we ever support uncaching + // of emojis, change to e.uncache(). + // In that case we should also rename + // this function to match the media + // equivalent -> fixCacheState(). + log.WithContext(ctx). + WithField("emoji", emoji.ID). + Debug("deleting due to missing emoji") + return e.delete(ctx, emoji) + }, + emoji.ImageStaticPath, + emoji.ImagePath, + ) +} + +func (e *Emoji) fixBroken(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + // Check we have the required category for emoji. + _, missing, err := e.getRelatedCategory(ctx, emoji) + if err != nil { + return false, err + } + + if missing { + if !gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return true, nil + } + + // Remove related category. + emoji.CategoryID = "" + + // Update emoji model in the database to remove category ID. + log.Debugf(ctx, "fixing missing emoji category: %s", emoji.ID) + if err := e.state.DB.UpdateEmoji(ctx, emoji, "category_id"); err != nil { + return true, gtserror.Newf("error updating emoji: %w", err) + } + + return true, nil + } + + return false, nil +} + +func (e *Emoji) getRelatedCategory(ctx context.Context, emoji *gtsmodel.Emoji) (*gtsmodel.EmojiCategory, bool, error) { + if emoji.CategoryID == "" { + // no related category. + return nil, false, nil + } + + // Load the category related to this emoji. + category, err := e.state.DB.GetEmojiCategory( + gtscontext.SetBarebones(ctx), + emoji.CategoryID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, false, gtserror.Newf("error fetching category by id %s: %w", emoji.CategoryID, err) + } + + if category == nil { + // Category is missing. + return nil, true, nil + } + + return category, false, nil +} + +func (e *Emoji) delete(ctx context.Context, emoji *gtsmodel.Emoji) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove emoji and static files. + _, err := e.removeFiles(ctx, + emoji.ImageStaticPath, + emoji.ImagePath, + ) + if err != nil { + return gtserror.Newf("error removing emoji files: %w", err) + } + + // Delete emoji entirely from the database by its ID. + if err := e.state.DB.DeleteEmojiByID(ctx, emoji.ID); err != nil { + return gtserror.Newf("error deleting emoji: %w", err) + } + + return nil +} diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go new file mode 100644 index 000000000..51a0aea6d --- /dev/null +++ b/internal/cleaner/media.go @@ -0,0 +1,547 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package cleaner + +import ( + "context" + "errors" + "time" + + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/media" + "github.com/superseriousbusiness/gotosocial/internal/regexes" + "github.com/superseriousbusiness/gotosocial/internal/uris" +) + +// Media encompasses a set of +// media cleanup / admin utils. +type Media struct { + *Cleaner +} + +// All will execute all cleaner.Media utilities synchronously, including output logging. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) All(ctx context.Context, maxRemoteDays int) { + t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays)) + m.LogUncacheRemote(ctx, t) + m.LogPruneOrphaned(ctx) + m.LogPruneUnused(ctx) + m.LogFixCacheStates(ctx) + _ = m.state.Storage.Storage.Clean(ctx) +} + +// LogUncacheRemote performs Media.UncacheRemote(...), logging the start and outcome. +func (m *Media) LogUncacheRemote(ctx context.Context, olderThan time.Time) { + log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp)) + if n, err := m.UncacheRemote(ctx, olderThan); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "uncached: %d", n) + } +} + +// LogPruneOrphaned performs Media.PruneOrphaned(...), logging the start and outcome. +func (m *Media) LogPruneOrphaned(ctx context.Context) { + log.Info(ctx, "start") + if n, err := m.PruneOrphaned(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogPruneUnused performs Media.PruneUnused(...), logging the start and outcome. +func (m *Media) LogPruneUnused(ctx context.Context) { + log.Info(ctx, "start") + if n, err := m.PruneUnused(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogFixCacheStates performs Media.FixCacheStates(...), logging the start and outcome. +func (m *Media) LogFixCacheStates(ctx context.Context) { + log.Info(ctx, "start") + if n, err := m.FixCacheStates(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "fixed: %d", n) + } +} + +// PruneOrphaned will delete orphaned files from storage (i.e. media missing a database entry). +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) PruneOrphaned(ctx context.Context) (int, error) { + var files []string + + // All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext} + if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error { + if !regexes.FilePath.MatchString(path) { + // This is not our expected media + // path format, skip this one. + return nil + } + + // Check whether this entry is orphaned. + orphaned, err := m.isOrphaned(ctx, path) + if err != nil { + return gtserror.Newf("error checking orphaned status: %w", err) + } + + if orphaned { + // Add this orphaned entry. + files = append(files, path) + } + + return nil + }); err != nil { + return 0, gtserror.Newf("error walking storage: %w", err) + } + + // Delete all orphaned files from storage. + return m.removeFiles(ctx, files...) +} + +// PruneUnused will delete all unused media attachments from the database and storage driver. +// Media is marked as unused if not attached to any status, account or account is suspended. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) PruneUnused(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of media attachments up to next max ID. + attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting attachments: %w", err) + } + + if len(attachments) == 0 { + // reached end. + break + } + + // Use last ID as the next 'maxID' value. + maxID = attachments[len(attachments)-1].ID + + for _, media := range attachments { + // Check / prune unused media attachment. + fixed, err := m.pruneUnused(ctx, media) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// UncacheRemote will uncache all remote media attachments older than given input time. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) { + var total int + + // Drop time by a minute to improve search, + // (i.e. make it olderThan inclusive search). + olderThan = olderThan.Add(-time.Minute) + + // Store recent time. + mostRecent := olderThan + + for { + // Fetch the next batch of attachments older than last-set time. + attachments, err := m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting remote media: %w", err) + } + + if len(attachments) == 0 { + // reached end. + break + } + + // Use last created-at as the next 'olderThan' value. + olderThan = attachments[len(attachments)-1].CreatedAt + + for _, media := range attachments { + // Check / uncache each remote media attachment. + uncached, err := m.uncacheRemote(ctx, mostRecent, media) + if err != nil { + return total, err + } + + if uncached { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// FixCacheStatus will check all media for up-to-date cache status (i.e. in storage driver). +// Media marked as cached, with any required files missing, will be automatically uncached. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) FixCacheStates(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of media attachments up to next max ID. + attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting avatars / headers: %w", err) + } + + if len(attachments) == 0 { + // reached end. + break + } + + // Use last ID as the next 'maxID' value. + maxID = attachments[len(attachments)-1].ID + + for _, media := range attachments { + // Check / fix required media cache states. + fixed, err := m.fixCacheState(ctx, media) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +func (m *Media) isOrphaned(ctx context.Context, path string) (bool, error) { + pathParts := regexes.FilePath.FindStringSubmatch(path) + if len(pathParts) != 6 { + // This doesn't match our expectations so + // it wasn't created by gts; ignore it. + return false, nil + } + + var ( + // 0th -> whole match + // 1st -> account ID + mediaType = pathParts[2] + // 3rd -> media sub-type (e.g. small, static) + mediaID = pathParts[4] + // 5th -> file extension + ) + + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", mediaID) + + switch media.Type(mediaType) { + case media.TypeAttachment: + // Look for media in database stored by ID. + media, err := m.state.DB.GetAttachmentByID( + gtscontext.SetBarebones(ctx), + mediaID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return false, gtserror.Newf("error fetching media by id %s: %w", mediaID, err) + } + + if media == nil { + l.Debug("missing db entry for media") + return true, nil + } + + case media.TypeEmoji: + // Generate static URL for this emoji to lookup. + staticURL := uris.GenerateURIForAttachment( + pathParts[1], // instance account ID + string(media.TypeEmoji), + string(media.SizeStatic), + mediaID, + "png", + ) + + // Look for emoji in database stored by static URL. + // The media ID part of the storage key for emojis can + // change for refreshed items, so search by generated URL. + emoji, err := m.state.DB.GetEmojiByStaticURL( + gtscontext.SetBarebones(ctx), + staticURL, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return false, gtserror.Newf("error fetching emoji by url %s: %w", staticURL, err) + } + + if emoji == nil { + l.Debug("missing db entry for emoji") + return true, nil + } + } + + return false, nil +} + +func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", media.ID) + + // Check whether we have the required account for media. + account, missing, err := m.getRelatedAccount(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("deleting due to missing account") + return true, m.delete(ctx, media) + } + + if account != nil { + // Related account exists for this media, check whether it is being used. + headerInUse := (*media.Header && media.ID == account.HeaderMediaAttachmentID) + avatarInUse := (*media.Avatar && media.ID == account.AvatarMediaAttachmentID) + if (headerInUse || avatarInUse) && account.SuspendedAt.IsZero() { + l.Debug("skipping as account media in use") + return false, nil + } + } + + // Check whether we have the required status for media. + status, missing, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("deleting due to missing status") + return true, m.delete(ctx, media) + } + + if status != nil { + // Check whether still attached to status. + for _, id := range status.AttachmentIDs { + if id == media.ID { + l.Debug("skippping as attached to status") + return false, nil + } + } + } + + // Media totally unused, delete it. + l.Debug("deleting unused media") + return true, m.delete(ctx, media) +} + +func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { + if !*media.Cached { + // We ignore uncached media, a + // false negative is a much better + // situation than a false positive, + // re-cache will just overwrite it. + return false, nil + } + + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", media.ID) + + // Check whether we have the required account for media. + _, missingAccount, err := m.getRelatedAccount(ctx, media) + if err != nil { + return false, err + } else if missingAccount { + l.Debug("skipping due to missing account") + return false, nil + } + + // Check whether we have the required status for media. + _, missingStatus, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missingStatus { + l.Debug("skipping due to missing status") + return false, nil + } + + // So we know this a valid cached media entry. + // Check that we have the files on disk required.... + return m.checkFiles(ctx, func() error { + l.Debug("uncaching due to missing media") + return m.uncache(ctx, media) + }, + media.Thumbnail.Path, + media.File.Path, + ) +} + +func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) { + if !*media.Cached { + // Already uncached. + return false, nil + } + + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", media.ID) + + // Check whether we have the required account for media. + account, missing, err := m.getRelatedAccount(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("skipping due to missing account") + return false, nil + } + + if account != nil && account.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched account") + return false, nil + } + + // Check whether we have the required status for media. + status, missing, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("skipping due to missing status") + return false, nil + } + + if status != nil && status.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched status") + return false, nil + } + + // This media is too old, uncache it. + l.Debug("uncaching old remote media") + return true, m.uncache(ctx, media) +} + +func (m *Media) getRelatedAccount(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Account, bool, error) { + if media.AccountID == "" { + // no related account. + return nil, false, nil + } + + // Load the account related to this media. + account, err := m.state.DB.GetAccountByID( + gtscontext.SetBarebones(ctx), + media.AccountID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, false, gtserror.Newf("error fetching account by id %s: %w", media.AccountID, err) + } + + if account == nil { + // account is missing. + return nil, true, nil + } + + return account, false, nil +} + +func (m *Media) getRelatedStatus(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Status, bool, error) { + if media.StatusID == "" { + // no related status. + return nil, false, nil + } + + // Load the status related to this media. + status, err := m.state.DB.GetStatusByID( + gtscontext.SetBarebones(ctx), + media.StatusID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, false, gtserror.Newf("error fetching status by id %s: %w", media.StatusID, err) + } + + if status == nil { + // status is missing. + return nil, true, nil + } + + return status, false, nil +} + +func (m *Media) uncache(ctx context.Context, media *gtsmodel.MediaAttachment) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove media and thumbnail. + _, err := m.removeFiles(ctx, + media.File.Path, + media.Thumbnail.Path, + ) + if err != nil { + return gtserror.Newf("error removing media files: %w", err) + } + + // Update attachment to reflect that we no longer have it cached. + log.Debugf(ctx, "marking media attachment as uncached: %s", media.ID) + media.Cached = func() *bool { i := false; return &i }() + if err := m.state.DB.UpdateAttachment(ctx, media, "cached"); err != nil { + return gtserror.Newf("error updating media: %w", err) + } + + return nil +} + +func (m *Media) delete(ctx context.Context, media *gtsmodel.MediaAttachment) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove media and thumbnail. + _, err := m.removeFiles(ctx, + media.File.Path, + media.Thumbnail.Path, + ) + if err != nil { + return gtserror.Newf("error removing media files: %w", err) + } + + // Delete media attachment entirely from the database. + log.Debugf(ctx, "deleting media attachment: %s", media.ID) + if err := m.state.DB.DeleteAttachment(ctx, media.ID); err != nil { + return gtserror.Newf("error deleting media: %w", err) + } + + return nil +} diff --git a/internal/cleaner/media_test.go b/internal/cleaner/media_test.go new file mode 100644 index 000000000..824df2ca5 --- /dev/null +++ b/internal/cleaner/media_test.go @@ -0,0 +1,427 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package cleaner_test + +import ( + "bytes" + "context" + "io" + "os" + "testing" + "time" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/media" + "github.com/superseriousbusiness/gotosocial/internal/state" + "github.com/superseriousbusiness/gotosocial/internal/storage" + "github.com/superseriousbusiness/gotosocial/internal/transport" + "github.com/superseriousbusiness/gotosocial/internal/visibility" + "github.com/superseriousbusiness/gotosocial/testrig" +) + +type MediaTestSuite struct { + suite.Suite + + db db.DB + storage *storage.Driver + state state.State + manager *media.Manager + cleaner *cleaner.Cleaner + transportController transport.Controller + testAttachments map[string]*gtsmodel.MediaAttachment + testAccounts map[string]*gtsmodel.Account + testEmojis map[string]*gtsmodel.Emoji +} + +func TestMediaTestSuite(t *testing.T) { + suite.Run(t, &MediaTestSuite{}) +} + +func (suite *MediaTestSuite) SetupTest() { + testrig.InitTestConfig() + testrig.InitTestLog() + + suite.state.Caches.Init() + testrig.StartWorkers(&suite.state) + + suite.db = testrig.NewTestDB(&suite.state) + suite.storage = testrig.NewInMemoryStorage() + suite.state.DB = suite.db + suite.state.Storage = suite.storage + + testrig.StandardStorageSetup(suite.storage, "../../testrig/media") + testrig.StandardDBSetup(suite.db, nil) + + testrig.StartTimelines( + &suite.state, + visibility.NewFilter(&suite.state), + testrig.NewTestTypeConverter(suite.db), + ) + + suite.testAttachments = testrig.NewTestAttachments() + suite.testAccounts = testrig.NewTestAccounts() + suite.testEmojis = testrig.NewTestEmojis() + suite.manager = testrig.NewTestMediaManager(&suite.state) + suite.cleaner = cleaner.New(&suite.state) + suite.transportController = testrig.NewTestTransportController(&suite.state, testrig.NewMockHTTPClient(nil, "../../testrig/media")) +} + +func (suite *MediaTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.db) + testrig.StandardStorageTeardown(suite.storage) + testrig.StopWorkers(&suite.state) +} + +// func (suite *MediaTestSuite) TestPruneOrphanedDry() { +// // add a big orphan panda to store +// b, err := os.ReadFile("../media/test/big-panda.gif") +// if err != nil { +// suite.FailNow(err.Error()) +// } + +// pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" +// if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil { +// suite.FailNow(err.Error()) +// } + +// ctx := context.Background() + +// // dry run should show up 1 orphaned panda +// totalPruned, err := suite.cleaner.Media().PruneOrphaned(gtscontext.SetDryRun(ctx)) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// // panda should still be in storage +// hasKey, err := suite.storage.Has(ctx, pandaPath) +// suite.NoError(err) +// suite.True(hasKey) +// } + +// func (suite *MediaTestSuite) TestPruneOrphanedMoist() { +// // i am not complicit in the moistness of this codebase :| + +// // add a big orphan panda to store +// b, err := os.ReadFile("../media/test/big-panda.gif") +// if err != nil { +// suite.FailNow(err.Error()) +// } + +// pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachment/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" +// if _, err := suite.storage.Put(context.Background(), pandaPath, b); err != nil { +// suite.FailNow(err.Error()) +// } + +// ctx := context.Background() + +// // should show up 1 orphaned panda +// totalPruned, err := suite.cleaner.Media().PruneOrphaned(ctx) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// // panda should no longer be in storage +// hasKey, err := suite.storage.Has(ctx, pandaPath) +// suite.NoError(err) +// suite.False(hasKey) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedLocal() { +// testAttachment := suite.testAttachments["local_account_1_unattached_1"] +// suite.True(*testAttachment.Cached) + +// totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedLocalDry() { +// testAttachment := suite.testAttachments["local_account_1_unattached_1"] +// suite.True(*testAttachment.Cached) + +// totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), true) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) +// suite.NoError(err) +// } + +// func (suite *MediaTestSuite) TestPruneRemoteTwice() { +// totalPruned, err := suite.manager.PruneUnusedLocal(context.Background(), false) +// suite.NoError(err) +// suite.Equal(1, totalPruned) + +// // final prune should prune nothing, since the first prune already happened +// totalPrunedAgain, err := suite.manager.PruneUnusedLocal(context.Background(), false) +// suite.NoError(err) +// suite.Equal(0, totalPrunedAgain) +// } + +// func (suite *MediaTestSuite) TestPruneOneNonExistent() { +// ctx := context.Background() +// testAttachment := suite.testAttachments["local_account_1_unattached_1"] + +// // Delete this attachment cached on disk +// media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID) +// suite.NoError(err) +// suite.True(*media.Cached) +// err = suite.storage.Delete(ctx, media.File.Path) +// suite.NoError(err) + +// // Now attempt to prune for item with db entry no file +// totalPruned, err := suite.manager.PruneUnusedLocal(ctx, false) +// suite.NoError(err) +// suite.Equal(1, totalPruned) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedRemote() { +// ctx := context.Background() + +// // start by clearing zork's avatar + header +// zorkOldAvatar := suite.testAttachments["local_account_1_avatar"] +// zorkOldHeader := suite.testAttachments["local_account_1_avatar"] +// zork := suite.testAccounts["local_account_1"] +// zork.AvatarMediaAttachmentID = "" +// zork.HeaderMediaAttachmentID = "" +// if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { +// panic(err) +// } + +// totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) +// suite.NoError(err) +// suite.Equal(2, totalPruned) + +// // media should no longer be stored +// _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path) +// suite.ErrorIs(err, storage.ErrNotFound) + +// // attachments should no longer be in the db +// _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedRemoteTwice() { +// ctx := context.Background() + +// // start by clearing zork's avatar + header +// zork := suite.testAccounts["local_account_1"] +// zork.AvatarMediaAttachmentID = "" +// zork.HeaderMediaAttachmentID = "" +// if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { +// panic(err) +// } + +// totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) +// suite.NoError(err) +// suite.Equal(2, totalPruned) + +// // final prune should prune nothing, since the first prune already happened +// totalPruned, err = suite.manager.PruneUnusedRemote(ctx, false) +// suite.NoError(err) +// suite.Equal(0, totalPruned) +// } + +// func (suite *MediaTestSuite) TestPruneUnusedRemoteMultipleAccounts() { +// ctx := context.Background() + +// // start by clearing zork's avatar + header +// zorkOldAvatar := suite.testAttachments["local_account_1_avatar"] +// zorkOldHeader := suite.testAttachments["local_account_1_avatar"] +// zork := suite.testAccounts["local_account_1"] +// zork.AvatarMediaAttachmentID = "" +// zork.HeaderMediaAttachmentID = "" +// if err := suite.db.UpdateByID(ctx, zork, zork.ID, "avatar_media_attachment_id", "header_media_attachment_id"); err != nil { +// panic(err) +// } + +// // set zork's unused header as belonging to turtle +// turtle := suite.testAccounts["local_account_1"] +// zorkOldHeader.AccountID = turtle.ID +// if err := suite.db.UpdateByID(ctx, zorkOldHeader, zorkOldHeader.ID, "account_id"); err != nil { +// panic(err) +// } + +// totalPruned, err := suite.manager.PruneUnusedRemote(ctx, false) +// suite.NoError(err) +// suite.Equal(2, totalPruned) + +// // media should no longer be stored +// _, err = suite.storage.Get(ctx, zorkOldAvatar.File.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldAvatar.Thumbnail.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldHeader.File.Path) +// suite.ErrorIs(err, storage.ErrNotFound) +// _, err = suite.storage.Get(ctx, zorkOldHeader.Thumbnail.Path) +// suite.ErrorIs(err, storage.ErrNotFound) + +// // attachments should no longer be in the db +// _, err = suite.db.GetAttachmentByID(ctx, zorkOldAvatar.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// _, err = suite.db.GetAttachmentByID(ctx, zorkOldHeader.ID) +// suite.ErrorIs(err, db.ErrNoEntries) +// } + +func (suite *MediaTestSuite) TestUncacheRemote() { + ctx := context.Background() + + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + suite.True(*testStatusAttachment.Cached) + + testHeader := suite.testAttachments["remote_account_3_header"] + suite.True(*testHeader.Cached) + + after := time.Now().Add(-24 * time.Hour) + totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(2, totalUncached) + + uncachedAttachment, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID) + suite.NoError(err) + suite.False(*uncachedAttachment.Cached) + + uncachedAttachment, err = suite.db.GetAttachmentByID(ctx, testHeader.ID) + suite.NoError(err) + suite.False(*uncachedAttachment.Cached) +} + +func (suite *MediaTestSuite) TestUncacheRemoteDry() { + ctx := context.Background() + + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + suite.True(*testStatusAttachment.Cached) + + testHeader := suite.testAttachments["remote_account_3_header"] + suite.True(*testHeader.Cached) + + after := time.Now().Add(-24 * time.Hour) + totalUncached, err := suite.cleaner.Media().UncacheRemote(gtscontext.SetDryRun(ctx), after) + suite.NoError(err) + suite.Equal(2, totalUncached) + + uncachedAttachment, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID) + suite.NoError(err) + suite.True(*uncachedAttachment.Cached) + + uncachedAttachment, err = suite.db.GetAttachmentByID(ctx, testHeader.ID) + suite.NoError(err) + suite.True(*uncachedAttachment.Cached) +} + +func (suite *MediaTestSuite) TestUncacheRemoteTwice() { + ctx := context.Background() + after := time.Now().Add(-24 * time.Hour) + + totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(2, totalUncached) + + // final uncache should uncache nothing, since the first uncache already happened + totalUncachedAgain, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(0, totalUncachedAgain) +} + +func (suite *MediaTestSuite) TestUncacheAndRecache() { + ctx := context.Background() + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + testHeader := suite.testAttachments["remote_account_3_header"] + + after := time.Now().Add(-24 * time.Hour) + totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(2, totalUncached) + + // media should no longer be stored + _, err = suite.storage.Get(ctx, testStatusAttachment.File.Path) + suite.ErrorIs(err, storage.ErrNotFound) + _, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path) + suite.ErrorIs(err, storage.ErrNotFound) + _, err = suite.storage.Get(ctx, testHeader.File.Path) + suite.ErrorIs(err, storage.ErrNotFound) + _, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path) + suite.ErrorIs(err, storage.ErrNotFound) + + // now recache the image.... + data := func(_ context.Context) (io.ReadCloser, int64, error) { + // load bytes from a test image + b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg") + if err != nil { + panic(err) + } + return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil + } + + for _, original := range []*gtsmodel.MediaAttachment{ + testStatusAttachment, + testHeader, + } { + processingRecache, err := suite.manager.PreProcessMediaRecache(ctx, data, original.ID) + suite.NoError(err) + + // synchronously load the recached attachment + recachedAttachment, err := processingRecache.LoadAttachment(ctx) + suite.NoError(err) + suite.NotNil(recachedAttachment) + + // recachedAttachment should be basically the same as the old attachment + suite.True(*recachedAttachment.Cached) + suite.Equal(original.ID, recachedAttachment.ID) + suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place + suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail + suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same + + // recached files should be back in storage + _, err = suite.storage.Get(ctx, recachedAttachment.File.Path) + suite.NoError(err) + _, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path) + suite.NoError(err) + } +} + +func (suite *MediaTestSuite) TestUncacheOneNonExistent() { + ctx := context.Background() + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + + // Delete this attachment cached on disk + media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID) + suite.NoError(err) + suite.True(*media.Cached) + err = suite.storage.Delete(ctx, media.File.Path) + suite.NoError(err) + + // Now attempt to uncache remote for item with db entry no file + after := time.Now().Add(-24 * time.Hour) + totalUncached, err := suite.cleaner.Media().UncacheRemote(ctx, after) + suite.NoError(err) + suite.Equal(2, totalUncached) +} |