diff options
Diffstat (limited to 'internal/cleaner/media.go')
-rw-r--r-- | internal/cleaner/media.go | 547 |
1 files changed, 547 insertions, 0 deletions
diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go new file mode 100644 index 000000000..51a0aea6d --- /dev/null +++ b/internal/cleaner/media.go @@ -0,0 +1,547 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package cleaner + +import ( + "context" + "errors" + "time" + + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/media" + "github.com/superseriousbusiness/gotosocial/internal/regexes" + "github.com/superseriousbusiness/gotosocial/internal/uris" +) + +// Media encompasses a set of +// media cleanup / admin utils. +type Media struct { + *Cleaner +} + +// All will execute all cleaner.Media utilities synchronously, including output logging. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) All(ctx context.Context, maxRemoteDays int) { + t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays)) + m.LogUncacheRemote(ctx, t) + m.LogPruneOrphaned(ctx) + m.LogPruneUnused(ctx) + m.LogFixCacheStates(ctx) + _ = m.state.Storage.Storage.Clean(ctx) +} + +// LogUncacheRemote performs Media.UncacheRemote(...), logging the start and outcome. +func (m *Media) LogUncacheRemote(ctx context.Context, olderThan time.Time) { + log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp)) + if n, err := m.UncacheRemote(ctx, olderThan); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "uncached: %d", n) + } +} + +// LogPruneOrphaned performs Media.PruneOrphaned(...), logging the start and outcome. +func (m *Media) LogPruneOrphaned(ctx context.Context) { + log.Info(ctx, "start") + if n, err := m.PruneOrphaned(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogPruneUnused performs Media.PruneUnused(...), logging the start and outcome. +func (m *Media) LogPruneUnused(ctx context.Context) { + log.Info(ctx, "start") + if n, err := m.PruneUnused(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogFixCacheStates performs Media.FixCacheStates(...), logging the start and outcome. +func (m *Media) LogFixCacheStates(ctx context.Context) { + log.Info(ctx, "start") + if n, err := m.FixCacheStates(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "fixed: %d", n) + } +} + +// PruneOrphaned will delete orphaned files from storage (i.e. media missing a database entry). +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) PruneOrphaned(ctx context.Context) (int, error) { + var files []string + + // All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext} + if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error { + if !regexes.FilePath.MatchString(path) { + // This is not our expected media + // path format, skip this one. + return nil + } + + // Check whether this entry is orphaned. + orphaned, err := m.isOrphaned(ctx, path) + if err != nil { + return gtserror.Newf("error checking orphaned status: %w", err) + } + + if orphaned { + // Add this orphaned entry. + files = append(files, path) + } + + return nil + }); err != nil { + return 0, gtserror.Newf("error walking storage: %w", err) + } + + // Delete all orphaned files from storage. + return m.removeFiles(ctx, files...) +} + +// PruneUnused will delete all unused media attachments from the database and storage driver. +// Media is marked as unused if not attached to any status, account or account is suspended. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) PruneUnused(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of media attachments up to next max ID. + attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting attachments: %w", err) + } + + if len(attachments) == 0 { + // reached end. + break + } + + // Use last ID as the next 'maxID' value. + maxID = attachments[len(attachments)-1].ID + + for _, media := range attachments { + // Check / prune unused media attachment. + fixed, err := m.pruneUnused(ctx, media) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// UncacheRemote will uncache all remote media attachments older than given input time. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) { + var total int + + // Drop time by a minute to improve search, + // (i.e. make it olderThan inclusive search). + olderThan = olderThan.Add(-time.Minute) + + // Store recent time. + mostRecent := olderThan + + for { + // Fetch the next batch of attachments older than last-set time. + attachments, err := m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting remote media: %w", err) + } + + if len(attachments) == 0 { + // reached end. + break + } + + // Use last created-at as the next 'olderThan' value. + olderThan = attachments[len(attachments)-1].CreatedAt + + for _, media := range attachments { + // Check / uncache each remote media attachment. + uncached, err := m.uncacheRemote(ctx, mostRecent, media) + if err != nil { + return total, err + } + + if uncached { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// FixCacheStatus will check all media for up-to-date cache status (i.e. in storage driver). +// Media marked as cached, with any required files missing, will be automatically uncached. +// Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (m *Media) FixCacheStates(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of media attachments up to next max ID. + attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting avatars / headers: %w", err) + } + + if len(attachments) == 0 { + // reached end. + break + } + + // Use last ID as the next 'maxID' value. + maxID = attachments[len(attachments)-1].ID + + for _, media := range attachments { + // Check / fix required media cache states. + fixed, err := m.fixCacheState(ctx, media) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +func (m *Media) isOrphaned(ctx context.Context, path string) (bool, error) { + pathParts := regexes.FilePath.FindStringSubmatch(path) + if len(pathParts) != 6 { + // This doesn't match our expectations so + // it wasn't created by gts; ignore it. + return false, nil + } + + var ( + // 0th -> whole match + // 1st -> account ID + mediaType = pathParts[2] + // 3rd -> media sub-type (e.g. small, static) + mediaID = pathParts[4] + // 5th -> file extension + ) + + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", mediaID) + + switch media.Type(mediaType) { + case media.TypeAttachment: + // Look for media in database stored by ID. + media, err := m.state.DB.GetAttachmentByID( + gtscontext.SetBarebones(ctx), + mediaID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return false, gtserror.Newf("error fetching media by id %s: %w", mediaID, err) + } + + if media == nil { + l.Debug("missing db entry for media") + return true, nil + } + + case media.TypeEmoji: + // Generate static URL for this emoji to lookup. + staticURL := uris.GenerateURIForAttachment( + pathParts[1], // instance account ID + string(media.TypeEmoji), + string(media.SizeStatic), + mediaID, + "png", + ) + + // Look for emoji in database stored by static URL. + // The media ID part of the storage key for emojis can + // change for refreshed items, so search by generated URL. + emoji, err := m.state.DB.GetEmojiByStaticURL( + gtscontext.SetBarebones(ctx), + staticURL, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return false, gtserror.Newf("error fetching emoji by url %s: %w", staticURL, err) + } + + if emoji == nil { + l.Debug("missing db entry for emoji") + return true, nil + } + } + + return false, nil +} + +func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", media.ID) + + // Check whether we have the required account for media. + account, missing, err := m.getRelatedAccount(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("deleting due to missing account") + return true, m.delete(ctx, media) + } + + if account != nil { + // Related account exists for this media, check whether it is being used. + headerInUse := (*media.Header && media.ID == account.HeaderMediaAttachmentID) + avatarInUse := (*media.Avatar && media.ID == account.AvatarMediaAttachmentID) + if (headerInUse || avatarInUse) && account.SuspendedAt.IsZero() { + l.Debug("skipping as account media in use") + return false, nil + } + } + + // Check whether we have the required status for media. + status, missing, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("deleting due to missing status") + return true, m.delete(ctx, media) + } + + if status != nil { + // Check whether still attached to status. + for _, id := range status.AttachmentIDs { + if id == media.ID { + l.Debug("skippping as attached to status") + return false, nil + } + } + } + + // Media totally unused, delete it. + l.Debug("deleting unused media") + return true, m.delete(ctx, media) +} + +func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { + if !*media.Cached { + // We ignore uncached media, a + // false negative is a much better + // situation than a false positive, + // re-cache will just overwrite it. + return false, nil + } + + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", media.ID) + + // Check whether we have the required account for media. + _, missingAccount, err := m.getRelatedAccount(ctx, media) + if err != nil { + return false, err + } else if missingAccount { + l.Debug("skipping due to missing account") + return false, nil + } + + // Check whether we have the required status for media. + _, missingStatus, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missingStatus { + l.Debug("skipping due to missing status") + return false, nil + } + + // So we know this a valid cached media entry. + // Check that we have the files on disk required.... + return m.checkFiles(ctx, func() error { + l.Debug("uncaching due to missing media") + return m.uncache(ctx, media) + }, + media.Thumbnail.Path, + media.File.Path, + ) +} + +func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) { + if !*media.Cached { + // Already uncached. + return false, nil + } + + // Start a log entry for media. + l := log.WithContext(ctx). + WithField("media", media.ID) + + // Check whether we have the required account for media. + account, missing, err := m.getRelatedAccount(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("skipping due to missing account") + return false, nil + } + + if account != nil && account.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched account") + return false, nil + } + + // Check whether we have the required status for media. + status, missing, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missing { + l.Debug("skipping due to missing status") + return false, nil + } + + if status != nil && status.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched status") + return false, nil + } + + // This media is too old, uncache it. + l.Debug("uncaching old remote media") + return true, m.uncache(ctx, media) +} + +func (m *Media) getRelatedAccount(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Account, bool, error) { + if media.AccountID == "" { + // no related account. + return nil, false, nil + } + + // Load the account related to this media. + account, err := m.state.DB.GetAccountByID( + gtscontext.SetBarebones(ctx), + media.AccountID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, false, gtserror.Newf("error fetching account by id %s: %w", media.AccountID, err) + } + + if account == nil { + // account is missing. + return nil, true, nil + } + + return account, false, nil +} + +func (m *Media) getRelatedStatus(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Status, bool, error) { + if media.StatusID == "" { + // no related status. + return nil, false, nil + } + + // Load the status related to this media. + status, err := m.state.DB.GetStatusByID( + gtscontext.SetBarebones(ctx), + media.StatusID, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, false, gtserror.Newf("error fetching status by id %s: %w", media.StatusID, err) + } + + if status == nil { + // status is missing. + return nil, true, nil + } + + return status, false, nil +} + +func (m *Media) uncache(ctx context.Context, media *gtsmodel.MediaAttachment) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove media and thumbnail. + _, err := m.removeFiles(ctx, + media.File.Path, + media.Thumbnail.Path, + ) + if err != nil { + return gtserror.Newf("error removing media files: %w", err) + } + + // Update attachment to reflect that we no longer have it cached. + log.Debugf(ctx, "marking media attachment as uncached: %s", media.ID) + media.Cached = func() *bool { i := false; return &i }() + if err := m.state.DB.UpdateAttachment(ctx, media, "cached"); err != nil { + return gtserror.Newf("error updating media: %w", err) + } + + return nil +} + +func (m *Media) delete(ctx context.Context, media *gtsmodel.MediaAttachment) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove media and thumbnail. + _, err := m.removeFiles(ctx, + media.File.Path, + media.Thumbnail.Path, + ) + if err != nil { + return gtserror.Newf("error removing media files: %w", err) + } + + // Delete media attachment entirely from the database. + log.Debugf(ctx, "deleting media attachment: %s", media.ID) + if err := m.state.DB.DeleteAttachment(ctx, media.ID); err != nil { + return gtserror.Newf("error deleting media: %w", err) + } + + return nil +} |