summaryrefslogtreecommitdiff
path: root/internal/media/pruneorphaned.go
blob: dd4c4e35fcea8b05aa134d96ff5e1020c833c393 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/*
   GoToSocial
   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU Affero General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Affero General Public License for more details.

   You should have received a copy of the GNU Affero General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

package media

import (
	"context"
	"errors"
	"fmt"

	"github.com/superseriousbusiness/gotosocial/internal/db"
	"github.com/superseriousbusiness/gotosocial/internal/log"
	"github.com/superseriousbusiness/gotosocial/internal/regexes"
	"github.com/superseriousbusiness/gotosocial/internal/uris"
)

func (m *manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) {
	var totalPruned int

	// keys in storage will look like the following:
	// `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]`
	// we can filter out keys we're not interested in by
	// matching through a regex
	var matchCount int
	match := func(storageKey string) bool {
		if regexes.FilePath.MatchString(storageKey) {
			matchCount++
			return true
		}
		return false
	}

	log.Info("checking storage keys for orphaned pruning candidates...")
	iterator, err := m.storage.Iterator(ctx, match)
	if err != nil {
		return 0, fmt.Errorf("PruneOrphaned: error getting storage iterator: %w", err)
	}

	// make sure we have some keys, and also advance
	// the iterator to the first non-empty key
	if !iterator.Next() {
		return 0, nil
	}

	instanceAccount, err := m.db.GetInstanceAccount(ctx, "")
	if err != nil {
		return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err)
	}
	instanceAccountID := instanceAccount.ID

	// for each key in the iterator, check if entry is orphaned
	log.Info("got %d orphaned pruning candidates, checking for orphaned status, please wait...")
	var checkedKeys int
	orphanedKeys := make([]string, 0, matchCount)
	for key := iterator.Key(); iterator.Next(); key = iterator.Key() {
		if m.orphaned(ctx, key, instanceAccountID) {
			orphanedKeys = append(orphanedKeys, key)
		}
		checkedKeys++
		if checkedKeys%50 == 0 {
			log.Infof("checked %d of %d orphaned pruning candidates...", checkedKeys, matchCount)
		}
	}
	iterator.Release()

	if !dry {
		// the real deal, we have to delete stuff
		for _, key := range orphanedKeys {
			log.Infof("key %s corresponds to orphaned media, will remove it now", key)
			if err := m.storage.Delete(ctx, key); err != nil {
				log.Errorf("error deleting item with key %s from storage: %s", key, err)
				continue
			}
			totalPruned++
		}
	} else {
		// just a dry run, don't delete anything
		for _, key := range orphanedKeys {
			log.Infof("DRY RUN: key %s corresponds to orphaned media which would be deleted", key)
			totalPruned++
		}
	}

	return totalPruned, nil
}

func (m *manager) orphaned(ctx context.Context, key string, instanceAccountID string) bool {
	pathParts := regexes.FilePath.FindStringSubmatch(key)
	if len(pathParts) != 6 {
		return false
	}

	mediaType := pathParts[2]
	mediaID := pathParts[4]

	var orphaned bool
	switch Type(mediaType) {
	case TypeAttachment, TypeHeader, TypeAvatar:
		if _, err := m.db.GetAttachmentByID(ctx, mediaID); err != nil {
			if errors.Is(err, db.ErrNoEntries) {
				orphaned = true
			} else {
				log.Errorf("orphaned: error calling GetAttachmentByID: %s", err)
			}
		}
	case TypeEmoji:
		// look using the static URL for the emoji, since the MEDIA_ID part of
		// the key for emojis will not necessarily correspond to the file that's
		// currently being used as the emoji image
		staticURI := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng)
		if _, err := m.db.GetEmojiByStaticURL(ctx, staticURI); err != nil {
			if errors.Is(err, db.ErrNoEntries) {
				orphaned = true
			} else {
				log.Errorf("orphaned: error calling GetEmojiByID: %s", err)
			}
		}
	default:
		orphaned = true
	}

	return orphaned
}