diff options
Diffstat (limited to 'internal/media/prune.go')
-rw-r--r-- | internal/media/prune.go | 373 |
1 files changed, 0 insertions, 373 deletions
diff --git a/internal/media/prune.go b/internal/media/prune.go deleted file mode 100644 index 71c8e00ce..000000000 --- a/internal/media/prune.go +++ /dev/null @@ -1,373 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see <http://www.gnu.org/licenses/>. - -package media - -import ( - "context" - "errors" - "fmt" - "time" - - "codeberg.org/gruf/go-store/v2/storage" - "github.com/superseriousbusiness/gotosocial/internal/db" - "github.com/superseriousbusiness/gotosocial/internal/gtserror" - "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" - "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/regexes" - "github.com/superseriousbusiness/gotosocial/internal/uris" -) - -const ( - selectPruneLimit = 50 // Amount of media entries to select at a time from the db when pruning. - unusedLocalAttachmentDays = 3 // Number of days to keep local media in storage if not attached to a status. -) - -// PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting -// empty directories from the storage driver. It can be called as a shortcut for calling the below -// pruning functions one by one. -// -// If blocking is true, then any errors encountered during the prune will be combined + returned to -// the caller. If blocking is false, the prune is run in the background and errors are just logged -// instead. -func (m *Manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error { - const dry = false - - f := func(innerCtx context.Context) error { - errs := gtserror.MultiError{} - - pruned, err := m.PruneUnusedLocal(innerCtx, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err)) - } else { - log.Infof(ctx, "pruned %d unused local media", pruned) - } - - pruned, err = m.PruneUnusedRemote(innerCtx, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err)) - } else { - log.Infof(ctx, "pruned %d unused remote media", pruned) - } - - pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err)) - } else { - log.Infof(ctx, "uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays) - } - - pruned, err = m.PruneOrphaned(innerCtx, dry) - if err != nil { - errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err)) - } else { - log.Infof(ctx, "pruned %d orphaned media", pruned) - } - - if err := m.state.Storage.Storage.Clean(innerCtx); err != nil { - errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err)) - } else { - log.Info(ctx, "cleaned storage") - } - - return errs.Combine() - } - - if blocking { - return f(ctx) - } - - go func() { - if err := f(context.Background()); err != nil { - log.Error(ctx, err) - } - }() - - return nil -} - -// PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance. -// -// The returned int is the amount of media that was pruned by this function. -func (m *Manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) { - var ( - totalPruned int - maxID string - attachments []*gtsmodel.MediaAttachment - err error - ) - - // We don't know in advance how many remote attachments will meet - // our criteria for being 'unused'. So a dry run in this case just - // means we iterate through as normal, but do nothing with each entry - // instead of removing it. Define this here so we don't do the 'if dry' - // check inside the loop a million times. - var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error - if !dry { - f = m.deleteAttachment - } else { - f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error { - return nil // noop - } - } - - for attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) { - maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value - - for _, attachment := range attachments { - // Retrieve owning account if possible. - var account *gtsmodel.Account - if accountID := attachment.AccountID; accountID != "" { - account, err = m.state.DB.GetAccountByID(ctx, attachment.AccountID) - if err != nil && !errors.Is(err, db.ErrNoEntries) { - // Only return on a real error. - return 0, fmt.Errorf("PruneUnusedRemote: error fetching account with id %s: %w", accountID, err) - } - } - - // Prune each attachment that meets one of the following criteria: - // - Has no owning account in the database. - // - Is a header but isn't the owning account's current header. - // - Is an avatar but isn't the owning account's current avatar. - if account == nil || - (*attachment.Header && attachment.ID != account.HeaderMediaAttachmentID) || - (*attachment.Avatar && attachment.ID != account.AvatarMediaAttachmentID) { - if err := f(ctx, attachment); err != nil { - return totalPruned, err - } - totalPruned++ - } - } - } - - // Make sure we don't have a real error when we leave the loop. - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return totalPruned, err - } - - return totalPruned, nil -} - -// PruneOrphaned prunes files that exist in storage but which do not have a corresponding -// entry in the database. -// -// If dry is true, then nothing will be changed, only the amount that *would* be removed -// is returned to the caller. -func (m *Manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) { - // Emojis are stored under the instance account, so we - // need the ID of the instance account for the next part. - instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "") - if err != nil { - return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err) - } - - instanceAccountID := instanceAccount.ID - - var orphanedKeys []string - - // Keys in storage will look like the following format: - // `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]` - // We can filter out keys we're not interested in by matching through a regex. - if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, key string) error { - if !regexes.FilePath.MatchString(key) { - // This is not our expected key format. - return nil - } - - // Check whether this storage entry is orphaned. - orphaned, err := m.orphaned(ctx, key, instanceAccountID) - if err != nil { - return fmt.Errorf("error checking orphaned status: %w", err) - } - - if orphaned { - // Add this orphaned entry to list of keys. - orphanedKeys = append(orphanedKeys, key) - } - - return nil - }); err != nil { - return 0, fmt.Errorf("PruneOrphaned: error walking keys: %w", err) - } - - totalPruned := len(orphanedKeys) - - if dry { - // Dry run: don't remove anything. - return totalPruned, nil - } - - // This is not a drill! We have to delete stuff! - return m.removeFiles(ctx, orphanedKeys...) -} - -func (m *Manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) { - pathParts := regexes.FilePath.FindStringSubmatch(key) - if len(pathParts) != 6 { - // This doesn't match our expectations so - // it wasn't created by gts; ignore it. - return false, nil - } - - var ( - mediaType = pathParts[2] - mediaID = pathParts[4] - orphaned = false - ) - - // Look for keys in storage that we don't have an attachment for. - switch Type(mediaType) { - case TypeAttachment, TypeHeader, TypeAvatar: - if _, err := m.state.DB.GetAttachmentByID(ctx, mediaID); err != nil { - if !errors.Is(err, db.ErrNoEntries) { - return false, fmt.Errorf("error calling GetAttachmentByID: %w", err) - } - orphaned = true - } - case TypeEmoji: - // Look using the static URL for the emoji. Emoji images can change, so - // the MEDIA_ID part of the key for emojis will not necessarily correspond - // to the file that's currently being used as the emoji image. - staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng) - if _, err := m.state.DB.GetEmojiByStaticURL(ctx, staticURL); err != nil { - if !errors.Is(err, db.ErrNoEntries) { - return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err) - } - orphaned = true - } - } - - return orphaned, nil -} - -// UncacheRemote uncaches all remote media attachments older than the given amount of days. -// -// In this context, uncacheing means deleting media files from storage and marking the attachment -// as cached=false in the database. -// -// If 'dry' is true, then only a dry run will be performed: nothing will actually be changed. -// -// The returned int is the amount of media that was/would be uncached by this function. -func (m *Manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) { - if olderThanDays < 0 { - return 0, nil - } - - olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays)) - - if dry { - // Dry run, just count eligible entries without removing them. - return m.state.DB.CountRemoteOlderThan(ctx, olderThan) - } - - var ( - totalPruned int - attachments []*gtsmodel.MediaAttachment - err error - ) - - for attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) { - olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value - - for _, attachment := range attachments { - if err := m.uncacheAttachment(ctx, attachment); err != nil { - return totalPruned, err - } - totalPruned++ - } - } - - // Make sure we don't have a real error when we leave the loop. - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return totalPruned, err - } - - return totalPruned, nil -} - -// PruneUnusedLocal prunes unused media attachments that were uploaded by -// a user on this instance, but never actually attached to a status, or attached but -// later detached. -// -// The returned int is the amount of media that was pruned by this function. -func (m *Manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) { - olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays)) - - if dry { - // Dry run, just count eligible entries without removing them. - return m.state.DB.CountLocalUnattachedOlderThan(ctx, olderThan) - } - - var ( - totalPruned int - attachments []*gtsmodel.MediaAttachment - err error - ) - - for attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) { - olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value - - for _, attachment := range attachments { - if err := m.deleteAttachment(ctx, attachment); err != nil { - return totalPruned, err - } - totalPruned++ - } - } - - // Make sure we don't have a real error when we leave the loop. - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return totalPruned, err - } - - return totalPruned, nil -} - -/* - Handy little helpers -*/ - -func (m *Manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { - if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { - return err - } - - // Delete attachment completely. - return m.state.DB.DeleteAttachment(ctx, attachment.ID) -} - -func (m *Manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { - if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { - return err - } - - // Update attachment to reflect that we no longer have it cached. - attachment.Cached = func() *bool { i := false; return &i }() - return m.state.DB.UpdateAttachment(ctx, attachment, "cached") -} - -func (m *Manager) removeFiles(ctx context.Context, keys ...string) (int, error) { - errs := make(gtserror.MultiError, 0, len(keys)) - - for _, key := range keys { - if err := m.state.Storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) { - errs = append(errs, "storage error removing "+key+": "+err.Error()) - } - } - - return len(keys) - len(errs), errs.Combine() -} |