summaryrefslogtreecommitdiff
path: root/internal/media/prune.go
diff options
context:
space:
mode:
Diffstat (limited to 'internal/media/prune.go')
-rw-r--r--internal/media/prune.go373
1 files changed, 0 insertions, 373 deletions
diff --git a/internal/media/prune.go b/internal/media/prune.go
deleted file mode 100644
index 71c8e00ce..000000000
--- a/internal/media/prune.go
+++ /dev/null
@@ -1,373 +0,0 @@
-// GoToSocial
-// Copyright (C) GoToSocial Authors admin@gotosocial.org
-// SPDX-License-Identifier: AGPL-3.0-or-later
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-package media
-
-import (
- "context"
- "errors"
- "fmt"
- "time"
-
- "codeberg.org/gruf/go-store/v2/storage"
- "github.com/superseriousbusiness/gotosocial/internal/db"
- "github.com/superseriousbusiness/gotosocial/internal/gtserror"
- "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
- "github.com/superseriousbusiness/gotosocial/internal/log"
- "github.com/superseriousbusiness/gotosocial/internal/regexes"
- "github.com/superseriousbusiness/gotosocial/internal/uris"
-)
-
-const (
- selectPruneLimit = 50 // Amount of media entries to select at a time from the db when pruning.
- unusedLocalAttachmentDays = 3 // Number of days to keep local media in storage if not attached to a status.
-)
-
-// PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting
-// empty directories from the storage driver. It can be called as a shortcut for calling the below
-// pruning functions one by one.
-//
-// If blocking is true, then any errors encountered during the prune will be combined + returned to
-// the caller. If blocking is false, the prune is run in the background and errors are just logged
-// instead.
-func (m *Manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error {
- const dry = false
-
- f := func(innerCtx context.Context) error {
- errs := gtserror.MultiError{}
-
- pruned, err := m.PruneUnusedLocal(innerCtx, dry)
- if err != nil {
- errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err))
- } else {
- log.Infof(ctx, "pruned %d unused local media", pruned)
- }
-
- pruned, err = m.PruneUnusedRemote(innerCtx, dry)
- if err != nil {
- errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err))
- } else {
- log.Infof(ctx, "pruned %d unused remote media", pruned)
- }
-
- pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry)
- if err != nil {
- errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err))
- } else {
- log.Infof(ctx, "uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays)
- }
-
- pruned, err = m.PruneOrphaned(innerCtx, dry)
- if err != nil {
- errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err))
- } else {
- log.Infof(ctx, "pruned %d orphaned media", pruned)
- }
-
- if err := m.state.Storage.Storage.Clean(innerCtx); err != nil {
- errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err))
- } else {
- log.Info(ctx, "cleaned storage")
- }
-
- return errs.Combine()
- }
-
- if blocking {
- return f(ctx)
- }
-
- go func() {
- if err := f(context.Background()); err != nil {
- log.Error(ctx, err)
- }
- }()
-
- return nil
-}
-
-// PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance.
-//
-// The returned int is the amount of media that was pruned by this function.
-func (m *Manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) {
- var (
- totalPruned int
- maxID string
- attachments []*gtsmodel.MediaAttachment
- err error
- )
-
- // We don't know in advance how many remote attachments will meet
- // our criteria for being 'unused'. So a dry run in this case just
- // means we iterate through as normal, but do nothing with each entry
- // instead of removing it. Define this here so we don't do the 'if dry'
- // check inside the loop a million times.
- var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error
- if !dry {
- f = m.deleteAttachment
- } else {
- f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error {
- return nil // noop
- }
- }
-
- for attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) {
- maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value
-
- for _, attachment := range attachments {
- // Retrieve owning account if possible.
- var account *gtsmodel.Account
- if accountID := attachment.AccountID; accountID != "" {
- account, err = m.state.DB.GetAccountByID(ctx, attachment.AccountID)
- if err != nil && !errors.Is(err, db.ErrNoEntries) {
- // Only return on a real error.
- return 0, fmt.Errorf("PruneUnusedRemote: error fetching account with id %s: %w", accountID, err)
- }
- }
-
- // Prune each attachment that meets one of the following criteria:
- // - Has no owning account in the database.
- // - Is a header but isn't the owning account's current header.
- // - Is an avatar but isn't the owning account's current avatar.
- if account == nil ||
- (*attachment.Header && attachment.ID != account.HeaderMediaAttachmentID) ||
- (*attachment.Avatar && attachment.ID != account.AvatarMediaAttachmentID) {
- if err := f(ctx, attachment); err != nil {
- return totalPruned, err
- }
- totalPruned++
- }
- }
- }
-
- // Make sure we don't have a real error when we leave the loop.
- if err != nil && !errors.Is(err, db.ErrNoEntries) {
- return totalPruned, err
- }
-
- return totalPruned, nil
-}
-
-// PruneOrphaned prunes files that exist in storage but which do not have a corresponding
-// entry in the database.
-//
-// If dry is true, then nothing will be changed, only the amount that *would* be removed
-// is returned to the caller.
-func (m *Manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) {
- // Emojis are stored under the instance account, so we
- // need the ID of the instance account for the next part.
- instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "")
- if err != nil {
- return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err)
- }
-
- instanceAccountID := instanceAccount.ID
-
- var orphanedKeys []string
-
- // Keys in storage will look like the following format:
- // `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]`
- // We can filter out keys we're not interested in by matching through a regex.
- if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, key string) error {
- if !regexes.FilePath.MatchString(key) {
- // This is not our expected key format.
- return nil
- }
-
- // Check whether this storage entry is orphaned.
- orphaned, err := m.orphaned(ctx, key, instanceAccountID)
- if err != nil {
- return fmt.Errorf("error checking orphaned status: %w", err)
- }
-
- if orphaned {
- // Add this orphaned entry to list of keys.
- orphanedKeys = append(orphanedKeys, key)
- }
-
- return nil
- }); err != nil {
- return 0, fmt.Errorf("PruneOrphaned: error walking keys: %w", err)
- }
-
- totalPruned := len(orphanedKeys)
-
- if dry {
- // Dry run: don't remove anything.
- return totalPruned, nil
- }
-
- // This is not a drill! We have to delete stuff!
- return m.removeFiles(ctx, orphanedKeys...)
-}
-
-func (m *Manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) {
- pathParts := regexes.FilePath.FindStringSubmatch(key)
- if len(pathParts) != 6 {
- // This doesn't match our expectations so
- // it wasn't created by gts; ignore it.
- return false, nil
- }
-
- var (
- mediaType = pathParts[2]
- mediaID = pathParts[4]
- orphaned = false
- )
-
- // Look for keys in storage that we don't have an attachment for.
- switch Type(mediaType) {
- case TypeAttachment, TypeHeader, TypeAvatar:
- if _, err := m.state.DB.GetAttachmentByID(ctx, mediaID); err != nil {
- if !errors.Is(err, db.ErrNoEntries) {
- return false, fmt.Errorf("error calling GetAttachmentByID: %w", err)
- }
- orphaned = true
- }
- case TypeEmoji:
- // Look using the static URL for the emoji. Emoji images can change, so
- // the MEDIA_ID part of the key for emojis will not necessarily correspond
- // to the file that's currently being used as the emoji image.
- staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng)
- if _, err := m.state.DB.GetEmojiByStaticURL(ctx, staticURL); err != nil {
- if !errors.Is(err, db.ErrNoEntries) {
- return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err)
- }
- orphaned = true
- }
- }
-
- return orphaned, nil
-}
-
-// UncacheRemote uncaches all remote media attachments older than the given amount of days.
-//
-// In this context, uncacheing means deleting media files from storage and marking the attachment
-// as cached=false in the database.
-//
-// If 'dry' is true, then only a dry run will be performed: nothing will actually be changed.
-//
-// The returned int is the amount of media that was/would be uncached by this function.
-func (m *Manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) {
- if olderThanDays < 0 {
- return 0, nil
- }
-
- olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays))
-
- if dry {
- // Dry run, just count eligible entries without removing them.
- return m.state.DB.CountRemoteOlderThan(ctx, olderThan)
- }
-
- var (
- totalPruned int
- attachments []*gtsmodel.MediaAttachment
- err error
- )
-
- for attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) {
- olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
-
- for _, attachment := range attachments {
- if err := m.uncacheAttachment(ctx, attachment); err != nil {
- return totalPruned, err
- }
- totalPruned++
- }
- }
-
- // Make sure we don't have a real error when we leave the loop.
- if err != nil && !errors.Is(err, db.ErrNoEntries) {
- return totalPruned, err
- }
-
- return totalPruned, nil
-}
-
-// PruneUnusedLocal prunes unused media attachments that were uploaded by
-// a user on this instance, but never actually attached to a status, or attached but
-// later detached.
-//
-// The returned int is the amount of media that was pruned by this function.
-func (m *Manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) {
- olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays))
-
- if dry {
- // Dry run, just count eligible entries without removing them.
- return m.state.DB.CountLocalUnattachedOlderThan(ctx, olderThan)
- }
-
- var (
- totalPruned int
- attachments []*gtsmodel.MediaAttachment
- err error
- )
-
- for attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) {
- olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
-
- for _, attachment := range attachments {
- if err := m.deleteAttachment(ctx, attachment); err != nil {
- return totalPruned, err
- }
- totalPruned++
- }
- }
-
- // Make sure we don't have a real error when we leave the loop.
- if err != nil && !errors.Is(err, db.ErrNoEntries) {
- return totalPruned, err
- }
-
- return totalPruned, nil
-}
-
-/*
- Handy little helpers
-*/
-
-func (m *Manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
- if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
- return err
- }
-
- // Delete attachment completely.
- return m.state.DB.DeleteAttachment(ctx, attachment.ID)
-}
-
-func (m *Manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
- if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
- return err
- }
-
- // Update attachment to reflect that we no longer have it cached.
- attachment.Cached = func() *bool { i := false; return &i }()
- return m.state.DB.UpdateAttachment(ctx, attachment, "cached")
-}
-
-func (m *Manager) removeFiles(ctx context.Context, keys ...string) (int, error) {
- errs := make(gtserror.MultiError, 0, len(keys))
-
- for _, key := range keys {
- if err := m.state.Storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) {
- errs = append(errs, "storage error removing "+key+": "+err.Error())
- }
- }
-
- return len(keys) - len(errs), errs.Combine()
-}