diff options
Diffstat (limited to 'internal/media/prune.go')
-rw-r--r-- | internal/media/prune.go | 353 |
1 files changed, 353 insertions, 0 deletions
diff --git a/internal/media/prune.go b/internal/media/prune.go new file mode 100644 index 000000000..7335feb7a --- /dev/null +++ b/internal/media/prune.go @@ -0,0 +1,353 @@ +/* + GoToSocial + Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package media + +import ( + "context" + "errors" + "fmt" + "time" + + "codeberg.org/gruf/go-store/v2/storage" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/regexes" + "github.com/superseriousbusiness/gotosocial/internal/uris" +) + +const ( + selectPruneLimit = 50 // Amount of media entries to select at a time from the db when pruning. + unusedLocalAttachmentDays = 3 // Number of days to keep local media in storage if not attached to a status. +) + +func (m *manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error { + const dry = false + + f := func(innerCtx context.Context) error { + errs := gtserror.MultiError{} + + pruned, err := m.PruneUnusedLocal(innerCtx, dry) + if err != nil { + errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err)) + } else { + log.Infof("pruned %d unused local media", pruned) + } + + pruned, err = m.PruneUnusedRemote(innerCtx, dry) + if err != nil { + errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err)) + } else { + log.Infof("pruned %d unused remote media", pruned) + } + + pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry) + if err != nil { + errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err)) + } else { + log.Infof("uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays) + } + + pruned, err = m.PruneOrphaned(innerCtx, dry) + if err != nil { + errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err)) + } else { + log.Infof("pruned %d orphaned media", pruned) + } + + if err := m.storage.Storage.Clean(innerCtx); err != nil { + errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err)) + } else { + log.Info("cleaned storage") + } + + return errs.Combine() + } + + if blocking { + return f(ctx) + } + + go func() { + if err := f(context.Background()); err != nil { + log.Error(err) + } + }() + + return nil +} + +func (m *manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) { + var ( + totalPruned int + maxID string + attachments []*gtsmodel.MediaAttachment + err error + ) + + // We don't know in advance how many remote attachments will meet + // our criteria for being 'unused'. So a dry run in this case just + // means we iterate through as normal, but do nothing with each entry + // instead of removing it. Define this here so we don't do the 'if dry' + // check inside the loop a million times. + var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error + if !dry { + f = m.deleteAttachment + } else { + f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error { + return nil // noop + } + } + + for attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) { + maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value + + // Prune each attachment that meets one of the following criteria: + // - Has no owning account in the database. + // - Is a header but isn't the owning account's current header. + // - Is an avatar but isn't the owning account's current avatar. + for _, attachment := range attachments { + if attachment.Account == nil || + (*attachment.Header && attachment.ID != attachment.Account.HeaderMediaAttachmentID) || + (*attachment.Avatar && attachment.ID != attachment.Account.AvatarMediaAttachmentID) { + if err := f(ctx, attachment); err != nil { + return totalPruned, err + } + totalPruned++ + } + } + } + + // Make sure we don't have a real error when we leave the loop. + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return totalPruned, err + } + + return totalPruned, nil +} + +func (m *manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) { + // keys in storage will look like the following: + // `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]` + // We can filter out keys we're not interested in by + // matching through a regex. + var matchCount int + match := func(storageKey string) bool { + if regexes.FilePath.MatchString(storageKey) { + matchCount++ + return true + } + return false + } + + iterator, err := m.storage.Iterator(ctx, match) // make sure this iterator is always released + if err != nil { + return 0, fmt.Errorf("PruneOrphaned: error getting storage iterator: %w", err) + } + + // Ensure we have some keys, and also advance + // the iterator to the first non-empty key. + if !iterator.Next() { + iterator.Release() + return 0, nil // nothing else to do here + } + + // Emojis are stored under the instance account, + // so we need the ID of the instance account for + // the next part. + instanceAccount, err := m.db.GetInstanceAccount(ctx, "") + if err != nil { + iterator.Release() + return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err) + } + instanceAccountID := instanceAccount.ID + + // For each key in the iterator, check if entry is orphaned. + orphanedKeys := make([]string, 0, matchCount) + for key := iterator.Key(); iterator.Next(); key = iterator.Key() { + orphaned, err := m.orphaned(ctx, key, instanceAccountID) + if err != nil { + iterator.Release() + return 0, fmt.Errorf("PruneOrphaned: checking orphaned status: %w", err) + } + + if orphaned { + orphanedKeys = append(orphanedKeys, key) + } + } + iterator.Release() + + totalPruned := len(orphanedKeys) + + if dry { + // Dry run: don't remove anything. + return totalPruned, nil + } + + // This is not a drill! + // We have to delete stuff! + return totalPruned, m.removeFiles(ctx, orphanedKeys...) +} + +func (m *manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) { + pathParts := regexes.FilePath.FindStringSubmatch(key) + if len(pathParts) != 6 { + // This doesn't match our expectations so + // it wasn't created by gts; ignore it. + return false, nil + } + + var ( + mediaType = pathParts[2] + mediaID = pathParts[4] + orphaned = false + ) + + // Look for keys in storage that we don't have an attachment for. + switch Type(mediaType) { + case TypeAttachment, TypeHeader, TypeAvatar: + if _, err := m.db.GetAttachmentByID(ctx, mediaID); err != nil { + if !errors.Is(err, db.ErrNoEntries) { + return false, fmt.Errorf("error calling GetAttachmentByID: %w", err) + } + orphaned = true + } + case TypeEmoji: + // Look using the static URL for the emoji. Emoji images can change, so + // the MEDIA_ID part of the key for emojis will not necessarily correspond + // to the file that's currently being used as the emoji image. + staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng) + if _, err := m.db.GetEmojiByStaticURL(ctx, staticURL); err != nil { + if !errors.Is(err, db.ErrNoEntries) { + return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err) + } + orphaned = true + } + } + + return orphaned, nil +} + +func (m *manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) { + if olderThanDays < 0 { + return 0, nil + } + + olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays)) + + if dry { + // Dry run, just count eligible entries without removing them. + return m.db.CountRemoteOlderThan(ctx, olderThan) + } + + var ( + totalPruned int + attachments []*gtsmodel.MediaAttachment + err error + ) + + for attachments, err = m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) { + olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value + + for _, attachment := range attachments { + if err := m.uncacheAttachment(ctx, attachment); err != nil { + return totalPruned, err + } + totalPruned++ + } + } + + // Make sure we don't have a real error when we leave the loop. + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return totalPruned, err + } + + return totalPruned, nil +} + +func (m *manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) { + olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays)) + + if dry { + // Dry run, just count eligible entries without removing them. + return m.db.CountLocalUnattachedOlderThan(ctx, olderThan) + } + + var ( + totalPruned int + attachments []*gtsmodel.MediaAttachment + err error + ) + + for attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) { + olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value + + for _, attachment := range attachments { + if err := m.deleteAttachment(ctx, attachment); err != nil { + return totalPruned, err + } + totalPruned++ + } + } + + // Make sure we don't have a real error when we leave the loop. + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return totalPruned, err + } + + return totalPruned, nil +} + +/* + Handy little helpers +*/ + +func (m *manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { + if err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { + return err + } + + // Delete attachment completely. + return m.db.DeleteByID(ctx, attachment.ID, attachment) +} + +func (m *manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error { + if err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil { + return err + } + + // Update attachment to reflect that we no longer have it cached. + attachment.UpdatedAt = time.Now() + cached := false + attachment.Cached = &cached + return m.db.UpdateByID(ctx, attachment, attachment.ID, "updated_at", "cached") +} + +func (m *manager) removeFiles(ctx context.Context, keys ...string) error { + errs := make(gtserror.MultiError, 0, len(keys)) + + for _, key := range keys { + if err := m.storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) { + errs = append(errs, "storage error removing "+key+": "+err.Error()) + } + } + + return errs.Combine() +} |