| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
 | // GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
package media
import (
	"context"
	"errors"
	"fmt"
	"time"
	"codeberg.org/gruf/go-store/v2/storage"
	"github.com/superseriousbusiness/gotosocial/internal/db"
	"github.com/superseriousbusiness/gotosocial/internal/gtserror"
	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
	"github.com/superseriousbusiness/gotosocial/internal/log"
	"github.com/superseriousbusiness/gotosocial/internal/regexes"
	"github.com/superseriousbusiness/gotosocial/internal/uris"
)
const (
	selectPruneLimit          = 50 // Amount of media entries to select at a time from the db when pruning.
	unusedLocalAttachmentDays = 3  // Number of days to keep local media in storage if not attached to a status.
)
func (m *manager) PruneAll(ctx context.Context, mediaCacheRemoteDays int, blocking bool) error {
	const dry = false
	f := func(innerCtx context.Context) error {
		errs := gtserror.MultiError{}
		pruned, err := m.PruneUnusedLocal(innerCtx, dry)
		if err != nil {
			errs = append(errs, fmt.Sprintf("error pruning unused local media (%s)", err))
		} else {
			log.Infof(ctx, "pruned %d unused local media", pruned)
		}
		pruned, err = m.PruneUnusedRemote(innerCtx, dry)
		if err != nil {
			errs = append(errs, fmt.Sprintf("error pruning unused remote media: (%s)", err))
		} else {
			log.Infof(ctx, "pruned %d unused remote media", pruned)
		}
		pruned, err = m.UncacheRemote(innerCtx, mediaCacheRemoteDays, dry)
		if err != nil {
			errs = append(errs, fmt.Sprintf("error uncacheing remote media older than %d day(s): (%s)", mediaCacheRemoteDays, err))
		} else {
			log.Infof(ctx, "uncached %d remote media older than %d day(s)", pruned, mediaCacheRemoteDays)
		}
		pruned, err = m.PruneOrphaned(innerCtx, dry)
		if err != nil {
			errs = append(errs, fmt.Sprintf("error pruning orphaned media: (%s)", err))
		} else {
			log.Infof(ctx, "pruned %d orphaned media", pruned)
		}
		if err := m.state.Storage.Storage.Clean(innerCtx); err != nil {
			errs = append(errs, fmt.Sprintf("error cleaning storage: (%s)", err))
		} else {
			log.Info(ctx, "cleaned storage")
		}
		return errs.Combine()
	}
	if blocking {
		return f(ctx)
	}
	go func() {
		if err := f(context.Background()); err != nil {
			log.Error(ctx, err)
		}
	}()
	return nil
}
func (m *manager) PruneUnusedRemote(ctx context.Context, dry bool) (int, error) {
	var (
		totalPruned int
		maxID       string
		attachments []*gtsmodel.MediaAttachment
		err         error
	)
	// We don't know in advance how many remote attachments will meet
	// our criteria for being 'unused'. So a dry run in this case just
	// means we iterate through as normal, but do nothing with each entry
	// instead of removing it. Define this here so we don't do the 'if dry'
	// check inside the loop a million times.
	var f func(ctx context.Context, attachment *gtsmodel.MediaAttachment) error
	if !dry {
		f = m.deleteAttachment
	} else {
		f = func(_ context.Context, _ *gtsmodel.MediaAttachment) error {
			return nil // noop
		}
	}
	for attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) {
		maxID = attachments[len(attachments)-1].ID // use the id of the last attachment in the slice as the next 'maxID' value
		for _, attachment := range attachments {
			// Retrieve owning account if possible.
			var account *gtsmodel.Account
			if accountID := attachment.AccountID; accountID != "" {
				account, err = m.state.DB.GetAccountByID(ctx, attachment.AccountID)
				if err != nil && !errors.Is(err, db.ErrNoEntries) {
					// Only return on a real error.
					return 0, fmt.Errorf("PruneUnusedRemote: error fetching account with id %s: %w", accountID, err)
				}
			}
			// Prune each attachment that meets one of the following criteria:
			// - Has no owning account in the database.
			// - Is a header but isn't the owning account's current header.
			// - Is an avatar but isn't the owning account's current avatar.
			if account == nil ||
				(*attachment.Header && attachment.ID != account.HeaderMediaAttachmentID) ||
				(*attachment.Avatar && attachment.ID != account.AvatarMediaAttachmentID) {
				if err := f(ctx, attachment); err != nil {
					return totalPruned, err
				}
				totalPruned++
			}
		}
	}
	// Make sure we don't have a real error when we leave the loop.
	if err != nil && !errors.Is(err, db.ErrNoEntries) {
		return totalPruned, err
	}
	return totalPruned, nil
}
func (m *manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) {
	// Emojis are stored under the instance account, so we
	// need the ID of the instance account for the next part.
	instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "")
	if err != nil {
		return 0, fmt.Errorf("PruneOrphaned: error getting instance account: %w", err)
	}
	instanceAccountID := instanceAccount.ID
	var orphanedKeys []string
	// Keys in storage will look like the following format:
	// `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[MEDIA_ID].[EXTENSION]`
	// We can filter out keys we're not interested in by matching through a regex.
	if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, key string) error {
		if !regexes.FilePath.MatchString(key) {
			// This is not our expected key format.
			return nil
		}
		// Check whether this storage entry is orphaned.
		orphaned, err := m.orphaned(ctx, key, instanceAccountID)
		if err != nil {
			return fmt.Errorf("error checking orphaned status: %w", err)
		}
		if orphaned {
			// Add this orphaned entry to list of keys.
			orphanedKeys = append(orphanedKeys, key)
		}
		return nil
	}); err != nil {
		return 0, fmt.Errorf("PruneOrphaned: error walking keys: %w", err)
	}
	totalPruned := len(orphanedKeys)
	if dry {
		// Dry run: don't remove anything.
		return totalPruned, nil
	}
	// This is not a drill! We have to delete stuff!
	return m.removeFiles(ctx, orphanedKeys...)
}
func (m *manager) orphaned(ctx context.Context, key string, instanceAccountID string) (bool, error) {
	pathParts := regexes.FilePath.FindStringSubmatch(key)
	if len(pathParts) != 6 {
		// This doesn't match our expectations so
		// it wasn't created by gts; ignore it.
		return false, nil
	}
	var (
		mediaType = pathParts[2]
		mediaID   = pathParts[4]
		orphaned  = false
	)
	// Look for keys in storage that we don't have an attachment for.
	switch Type(mediaType) {
	case TypeAttachment, TypeHeader, TypeAvatar:
		if _, err := m.state.DB.GetAttachmentByID(ctx, mediaID); err != nil {
			if !errors.Is(err, db.ErrNoEntries) {
				return false, fmt.Errorf("error calling GetAttachmentByID: %w", err)
			}
			orphaned = true
		}
	case TypeEmoji:
		// Look using the static URL for the emoji. Emoji images can change, so
		// the MEDIA_ID part of the key for emojis will not necessarily correspond
		// to the file that's currently being used as the emoji image.
		staticURL := uris.GenerateURIForAttachment(instanceAccountID, string(TypeEmoji), string(SizeStatic), mediaID, mimePng)
		if _, err := m.state.DB.GetEmojiByStaticURL(ctx, staticURL); err != nil {
			if !errors.Is(err, db.ErrNoEntries) {
				return false, fmt.Errorf("error calling GetEmojiByStaticURL: %w", err)
			}
			orphaned = true
		}
	}
	return orphaned, nil
}
func (m *manager) UncacheRemote(ctx context.Context, olderThanDays int, dry bool) (int, error) {
	if olderThanDays < 0 {
		return 0, nil
	}
	olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays))
	if dry {
		// Dry run, just count eligible entries without removing them.
		return m.state.DB.CountRemoteOlderThan(ctx, olderThan)
	}
	var (
		totalPruned int
		attachments []*gtsmodel.MediaAttachment
		err         error
	)
	for attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) {
		olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
		for _, attachment := range attachments {
			if err := m.uncacheAttachment(ctx, attachment); err != nil {
				return totalPruned, err
			}
			totalPruned++
		}
	}
	// Make sure we don't have a real error when we leave the loop.
	if err != nil && !errors.Is(err, db.ErrNoEntries) {
		return totalPruned, err
	}
	return totalPruned, nil
}
func (m *manager) PruneUnusedLocal(ctx context.Context, dry bool) (int, error) {
	olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(unusedLocalAttachmentDays))
	if dry {
		// Dry run, just count eligible entries without removing them.
		return m.state.DB.CountLocalUnattachedOlderThan(ctx, olderThan)
	}
	var (
		totalPruned int
		attachments []*gtsmodel.MediaAttachment
		err         error
	)
	for attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.state.DB.GetLocalUnattachedOlderThan(ctx, olderThan, selectPruneLimit) {
		olderThan = attachments[len(attachments)-1].CreatedAt // use the created time of the last attachment in the slice as the next 'olderThan' value
		for _, attachment := range attachments {
			if err := m.deleteAttachment(ctx, attachment); err != nil {
				return totalPruned, err
			}
			totalPruned++
		}
	}
	// Make sure we don't have a real error when we leave the loop.
	if err != nil && !errors.Is(err, db.ErrNoEntries) {
		return totalPruned, err
	}
	return totalPruned, nil
}
/*
	Handy little helpers
*/
func (m *manager) deleteAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
	if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
		return err
	}
	// Delete attachment completely.
	return m.state.DB.DeleteAttachment(ctx, attachment.ID)
}
func (m *manager) uncacheAttachment(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
	if _, err := m.removeFiles(ctx, attachment.File.Path, attachment.Thumbnail.Path); err != nil {
		return err
	}
	// Update attachment to reflect that we no longer have it cached.
	attachment.Cached = func() *bool { i := false; return &i }()
	return m.state.DB.UpdateAttachment(ctx, attachment, "cached")
}
func (m *manager) removeFiles(ctx context.Context, keys ...string) (int, error) {
	errs := make(gtserror.MultiError, 0, len(keys))
	for _, key := range keys {
		if err := m.state.Storage.Delete(ctx, key); err != nil && !errors.Is(err, storage.ErrNotFound) {
			errs = append(errs, "storage error removing "+key+": "+err.Error())
		}
	}
	return len(keys) - len(errs), errs.Combine()
}
 |