diff options
author | 2022-12-12 11:22:19 +0000 | |
---|---|---|
committer | 2022-12-12 12:22:19 +0100 | |
commit | 58c87bdd7f37e80a57c4802e95eb6c572ccffed4 (patch) | |
tree | ecfa90c1249adbd02b386f624b9dbf3c405cafd6 /internal | |
parent | [chore]: Bump golang.org/x/image from 0.1.0 to 0.2.0 (#1252) (diff) | |
download | gotosocial-58c87bdd7f37e80a57c4802e95eb6c572ccffed4.tar.xz |
[feature] allow uncaching of other media types (#1234)
* simplify pruneRemote, remove unncecessary media trace logging, update RemoteOlderThan() to include headers/avis
Signed-off-by: kim <grufwub@gmail.com>
* cleanup pruneallmeta, add remote header to pruneremote tests
Signed-off-by: kim <grufwub@gmail.com>
* fix olderthan duration additions
Signed-off-by: kim <grufwub@gmail.com>
* fix broken test now that test model header changed
Signed-off-by: kim <grufwub@gmail.com>
* instead use new remote test account for new header model
Signed-off-by: kim <grufwub@gmail.com>
* use newer generated ULID for remote_account_3 to ensure it is sorted last
Signed-off-by: kim <grufwub@gmail.com>
* reorganize serialized keys to match expected test account model order
Signed-off-by: kim <grufwub@gmail.com>
Signed-off-by: kim <grufwub@gmail.com>
Diffstat (limited to 'internal')
-rw-r--r-- | internal/db/bundb/media.go | 3 | ||||
-rw-r--r-- | internal/db/bundb/media_test.go | 4 | ||||
-rw-r--r-- | internal/db/media.go | 14 | ||||
-rw-r--r-- | internal/media/processingmedia.go | 17 | ||||
-rw-r--r-- | internal/media/prunemeta.go | 30 | ||||
-rw-r--r-- | internal/media/pruneremote.go | 44 | ||||
-rw-r--r-- | internal/media/pruneremote_test.go | 81 | ||||
-rw-r--r-- | internal/media/pruneunusedlocal.go | 7 | ||||
-rw-r--r-- | internal/media/util.go | 17 |
9 files changed, 107 insertions, 110 deletions
diff --git a/internal/db/bundb/media.go b/internal/db/bundb/media.go index 39e0ad0e3..3be7be39a 100644 --- a/internal/db/bundb/media.go +++ b/internal/db/bundb/media.go @@ -57,8 +57,6 @@ func (m *mediaDB) GetRemoteOlderThan(ctx context.Context, olderThan time.Time, l NewSelect(). Model(&attachments). Where("? = ?", bun.Ident("media_attachment.cached"), true). - Where("? = ?", bun.Ident("media_attachment.avatar"), false). - Where("? = ?", bun.Ident("media_attachment.header"), false). Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan). WhereGroup(" AND ", whereNotEmptyAndNotNull("media_attachment.remote_url")). Order("media_attachment.created_at DESC") @@ -70,6 +68,7 @@ func (m *mediaDB) GetRemoteOlderThan(ctx context.Context, olderThan time.Time, l if err := q.Scan(ctx); err != nil { return nil, m.conn.ProcessError(err) } + return attachments, nil } diff --git a/internal/db/bundb/media_test.go b/internal/db/bundb/media_test.go index d6a4981f8..1676954ce 100644 --- a/internal/db/bundb/media_test.go +++ b/internal/db/bundb/media_test.go @@ -41,7 +41,7 @@ func (suite *MediaTestSuite) TestGetAttachmentByID() { func (suite *MediaTestSuite) TestGetOlder() { attachments, err := suite.db.GetRemoteOlderThan(context.Background(), time.Now(), 20) suite.NoError(err) - suite.Len(attachments, 2) + suite.Len(attachments, 3) } func (suite *MediaTestSuite) TestGetAvisAndHeaders() { @@ -49,7 +49,7 @@ func (suite *MediaTestSuite) TestGetAvisAndHeaders() { attachments, err := suite.db.GetAvatarsAndHeaders(ctx, "", 20) suite.NoError(err) - suite.Len(attachments, 2) + suite.Len(attachments, 3) } func (suite *MediaTestSuite) TestGetLocalUnattachedOlderThan() { diff --git a/internal/db/media.go b/internal/db/media.go index 2f9ed79dc..2d4fbb441 100644 --- a/internal/db/media.go +++ b/internal/db/media.go @@ -29,18 +29,20 @@ import ( type Media interface { // GetAttachmentByID gets a single attachment by its ID GetAttachmentByID(ctx context.Context, id string) (*gtsmodel.MediaAttachment, Error) - // GetRemoteOlderThan gets limit n remote media attachments older than the given olderThan time. - // These will be returned in order of attachment.created_at descending (newest to oldest in other words). + + // GetRemoteOlderThan gets limit n remote media attachments (including avatars and headers) older than the given + // olderThan time. These will be returned in order of attachment.created_at descending (newest to oldest in other words). // // The selected media attachments will be those with both a URL and a RemoteURL filled in. // In other words, media attachments that originated remotely, and that we currently have cached locally. GetRemoteOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error) + // GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers // and avis may be in use or not; the caller should check this if it's important. GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error) - // GetLocalUnattachedOlderThan fetches limit n local media attachments, older than the given time, which - // aren't header or avatars, and aren't attached to a status. In other words, attachments which were uploaded - // but never used for whatever reason, or attachments that were attached to a status which was subsequently - // deleted. + + // GetLocalUnattachedOlderThan fetches limit n local media attachments (including avatars and headers), older than + // the given time, which aren't header or avatars, and aren't attached to a status. In other words, attachments which were + // uploaded but never used for whatever reason, or attachments that were attached to a status which was subsequently deleted. GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error) } diff --git a/internal/media/processingmedia.go b/internal/media/processingmedia.go index 1fd9423f5..94c8f9a7a 100644 --- a/internal/media/processingmedia.go +++ b/internal/media/processingmedia.go @@ -81,10 +81,8 @@ func (p *ProcessingMedia) AttachmentID() string { // LoadAttachment blocks until the thumbnail and fullsize content // has been processed, and then returns the completed attachment. func (p *ProcessingMedia) LoadAttachment(ctx context.Context) (*gtsmodel.MediaAttachment, error) { - log.Tracef("LoadAttachment: getting lock for attachment %s", p.attachment.URL) p.mu.Lock() defer p.mu.Unlock() - log.Tracef("LoadAttachment: got lock for attachment %s", p.attachment.URL) if err := p.store(ctx); err != nil { return nil, err @@ -98,23 +96,24 @@ func (p *ProcessingMedia) LoadAttachment(ctx context.Context) (*gtsmodel.MediaAt return nil, err } - // store the result in the database before returning it if !p.insertedInDB { if p.recache { - // if it's a recache we should only need to update + // This is an existing media attachment we're recaching, so only need to update it if err := p.database.UpdateByID(ctx, p.attachment, p.attachment.ID); err != nil { return nil, err } } else { - // otherwise we need to really PUT it + // This is a new media attachment we're caching for first time if err := p.database.Put(ctx, p.attachment); err != nil { return nil, err } } + + // Mark this as stored in DB p.insertedInDB = true } - log.Tracef("LoadAttachment: finished, returning attachment %s", p.attachment.URL) + log.Tracef("finished loading attachment %s", p.attachment.URL) return p.attachment, nil } @@ -180,7 +179,7 @@ func (p *ProcessingMedia) loadThumb(ctx context.Context) error { // we're done processing the thumbnail! atomic.StoreInt32(&p.thumbState, int32(complete)) - log.Tracef("loadThumb: finished processing thumbnail for attachment %s", p.attachment.URL) + log.Tracef("finished processing thumbnail for attachment %s", p.attachment.URL) fallthrough case complete: return nil @@ -241,7 +240,7 @@ func (p *ProcessingMedia) loadFullSize(ctx context.Context) error { // we're done processing the full-size image atomic.StoreInt32(&p.fullSizeState, int32(complete)) - log.Tracef("loadFullSize: finished processing full size image for attachment %s", p.attachment.URL) + log.Tracef("finished processing full size image for attachment %s", p.attachment.URL) fallthrough case complete: return nil @@ -362,7 +361,7 @@ func (p *ProcessingMedia) store(ctx context.Context) error { p.attachment.File.FileSize = int(fileSize) p.read = true - log.Tracef("store: finished storing initial data for attachment %s", p.attachment.URL) + log.Tracef("finished storing initial data for attachment %s", p.attachment.URL) return nil } diff --git a/internal/media/prunemeta.go b/internal/media/prunemeta.go index 69d79b8d9..19fbb7e7e 100644 --- a/internal/media/prunemeta.go +++ b/internal/media/prunemeta.go @@ -20,6 +20,7 @@ package media import ( "context" + "errors" "codeberg.org/gruf/go-store/v2/storage" "github.com/superseriousbusiness/gotosocial/internal/db" @@ -28,17 +29,23 @@ import ( ) func (m *manager) PruneAllMeta(ctx context.Context) (int, error) { - var totalPruned int - var maxID string - var attachments []*gtsmodel.MediaAttachment - var err error + var ( + totalPruned int + maxID string + ) + + for { + // select "selectPruneLimit" headers / avatars at a time for pruning + attachments, err := m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return totalPruned, err + } else if len(attachments) == 0 { + break + } - // select 20 attachments at a time and prune them - for attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetAvatarsAndHeaders(ctx, maxID, selectPruneLimit) { // use the id of the last attachment in the slice as the next 'maxID' value - l := len(attachments) - log.Tracef("PruneAllMeta: got %d attachments with maxID < %s", l, maxID) - maxID = attachments[l-1].ID + log.Tracef("PruneAllMeta: got %d attachments with maxID < %s", len(attachments), maxID) + maxID = attachments[len(attachments)-1].ID // prune each attachment that meets one of the following criteria: // - has no owning account in the database @@ -56,11 +63,6 @@ func (m *manager) PruneAllMeta(ctx context.Context) (int, error) { } } - // make sure we don't have a real error when we leave the loop - if err != nil && err != db.ErrNoEntries { - return totalPruned, err - } - log.Infof("PruneAllMeta: finished pruning avatars + headers: pruned %d entries", totalPruned) return totalPruned, nil } diff --git a/internal/media/pruneremote.go b/internal/media/pruneremote.go index 011ed1dd7..f6108588f 100644 --- a/internal/media/pruneremote.go +++ b/internal/media/pruneremote.go @@ -20,7 +20,8 @@ package media import ( "context" - "fmt" + "errors" + "time" "codeberg.org/gruf/go-store/v2/storage" "github.com/superseriousbusiness/gotosocial/internal/db" @@ -31,21 +32,23 @@ import ( func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) { var totalPruned int - olderThan, err := parseOlderThan(olderThanDays) - if err != nil { - return totalPruned, fmt.Errorf("PruneAllRemote: error parsing olderThanDays %d: %s", olderThanDays, err) - } + olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(olderThanDays)) log.Infof("PruneAllRemote: pruning media older than %s", olderThan) - // select 20 attachments at a time and prune them - for attachments, err := m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) { + for { + // Select "selectPruneLimit" status attacchments at a time for pruning + attachments, err := m.db.GetRemoteOlderThan(ctx, olderThan, selectPruneLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return totalPruned, err + } else if len(attachments) == 0 { + break + } - // use the age of the oldest attachment (the last one in the slice) as the next 'older than' value - l := len(attachments) - log.Tracef("PruneAllRemote: got %d attachments older than %s", l, olderThan) - olderThan = attachments[l-1].CreatedAt + // use the age of the oldest attachment (last in slice) as the next 'olderThan' value + log.Tracef("PruneAllRemote: got %d status attachments older than %s", len(attachments), olderThan) + olderThan = attachments[len(attachments)-1].CreatedAt - // prune each attachment + // prune each status attachment for _, attachment := range attachments { if err := m.pruneOneRemote(ctx, attachment); err != nil { return totalPruned, err @@ -54,11 +57,6 @@ func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, e } } - // make sure we don't have a real error when we leave the loop - if err != nil && err != db.ErrNoEntries { - return totalPruned, err - } - log.Infof("PruneAllRemote: finished pruning remote media: pruned %d entries", totalPruned) return totalPruned, nil } @@ -69,7 +67,7 @@ func (m *manager) pruneOneRemote(ctx context.Context, attachment *gtsmodel.Media if attachment.File.Path != "" { // delete the full size attachment from storage log.Tracef("pruneOneRemote: deleting %s", attachment.File.Path) - if err := m.storage.Delete(ctx, attachment.File.Path); err != nil && err != storage.ErrNotFound { + if err := m.storage.Delete(ctx, attachment.File.Path); err != nil && !errors.Is(err, storage.ErrNotFound) { return err } cached := false @@ -80,7 +78,7 @@ func (m *manager) pruneOneRemote(ctx context.Context, attachment *gtsmodel.Media if attachment.Thumbnail.Path != "" { // delete the thumbnail from storage log.Tracef("pruneOneRemote: deleting %s", attachment.Thumbnail.Path) - if err := m.storage.Delete(ctx, attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound { + if err := m.storage.Delete(ctx, attachment.Thumbnail.Path); err != nil && !errors.Is(err, storage.ErrNotFound) { return err } cached := false @@ -88,10 +86,10 @@ func (m *manager) pruneOneRemote(ctx context.Context, attachment *gtsmodel.Media changed = true } - // update the attachment to reflect that we no longer have it cached - if changed { - return m.db.UpdateByID(ctx, attachment, attachment.ID, "updated_at", "cached") + if !changed { + return nil } - return nil + // update the attachment to reflect that we no longer have it cached + return m.db.UpdateByID(ctx, attachment, attachment.ID, "updated_at", "cached") } diff --git a/internal/media/pruneremote_test.go b/internal/media/pruneremote_test.go index 4af01c1c5..d4365f72c 100644 --- a/internal/media/pruneremote_test.go +++ b/internal/media/pruneremote_test.go @@ -27,6 +27,7 @@ import ( "codeberg.org/gruf/go-store/v2/storage" "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) type PruneRemoteTestSuite struct { @@ -34,24 +35,29 @@ type PruneRemoteTestSuite struct { } func (suite *PruneRemoteTestSuite) TestPruneRemote() { - testAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] - suite.True(*testAttachment.Cached) + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + suite.True(*testStatusAttachment.Cached) + + testHeader := suite.testAttachments["remote_account_3_header"] + suite.True(*testHeader.Cached) totalPruned, err := suite.manager.PruneAllRemote(context.Background(), 1) suite.NoError(err) - suite.Equal(2, totalPruned) + suite.Equal(3, totalPruned) - prunedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testAttachment.ID) + prunedAttachment, err := suite.db.GetAttachmentByID(context.Background(), testStatusAttachment.ID) suite.NoError(err) + suite.False(*prunedAttachment.Cached) - // the media should no longer be cached + prunedAttachment, err = suite.db.GetAttachmentByID(context.Background(), testHeader.ID) + suite.NoError(err) suite.False(*prunedAttachment.Cached) } func (suite *PruneRemoteTestSuite) TestPruneRemoteTwice() { totalPruned, err := suite.manager.PruneAllRemote(context.Background(), 1) suite.NoError(err) - suite.Equal(2, totalPruned) + suite.Equal(3, totalPruned) // final prune should prune nothing, since the first prune already happened totalPrunedAgain, err := suite.manager.PruneAllRemote(context.Background(), 1) @@ -61,16 +67,21 @@ func (suite *PruneRemoteTestSuite) TestPruneRemoteTwice() { func (suite *PruneRemoteTestSuite) TestPruneAndRecache() { ctx := context.Background() - testAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + testHeader := suite.testAttachments["remote_account_3_header"] totalPruned, err := suite.manager.PruneAllRemote(ctx, 1) suite.NoError(err) - suite.Equal(2, totalPruned) + suite.Equal(3, totalPruned) // media should no longer be stored - _, err = suite.storage.Get(ctx, testAttachment.File.Path) + _, err = suite.storage.Get(ctx, testStatusAttachment.File.Path) suite.ErrorIs(err, storage.ErrNotFound) - _, err = suite.storage.Get(ctx, testAttachment.Thumbnail.Path) + _, err = suite.storage.Get(ctx, testStatusAttachment.Thumbnail.Path) + suite.ErrorIs(err, storage.ErrNotFound) + _, err = suite.storage.Get(ctx, testHeader.File.Path) + suite.ErrorIs(err, storage.ErrNotFound) + _, err = suite.storage.Get(ctx, testHeader.Thumbnail.Path) suite.ErrorIs(err, storage.ErrNotFound) // now recache the image.... @@ -82,34 +93,40 @@ func (suite *PruneRemoteTestSuite) TestPruneAndRecache() { } return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil } - processingRecache, err := suite.manager.RecacheMedia(ctx, data, nil, testAttachment.ID) - suite.NoError(err) - // synchronously load the recached attachment - recachedAttachment, err := processingRecache.LoadAttachment(ctx) - suite.NoError(err) - suite.NotNil(recachedAttachment) - - // recachedAttachment should be basically the same as the old attachment - suite.True(*recachedAttachment.Cached) - suite.Equal(testAttachment.ID, recachedAttachment.ID) - suite.Equal(testAttachment.File.Path, recachedAttachment.File.Path) // file should be stored in the same place - suite.Equal(testAttachment.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail - suite.EqualValues(testAttachment.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same - - // recached files should be back in storage - _, err = suite.storage.Get(ctx, recachedAttachment.File.Path) - suite.NoError(err) - _, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path) - suite.NoError(err) + for _, original := range []*gtsmodel.MediaAttachment{ + testStatusAttachment, + testHeader, + } { + processingRecache, err := suite.manager.RecacheMedia(ctx, data, nil, original.ID) + suite.NoError(err) + + // synchronously load the recached attachment + recachedAttachment, err := processingRecache.LoadAttachment(ctx) + suite.NoError(err) + suite.NotNil(recachedAttachment) + + // recachedAttachment should be basically the same as the old attachment + suite.True(*recachedAttachment.Cached) + suite.Equal(original.ID, recachedAttachment.ID) + suite.Equal(original.File.Path, recachedAttachment.File.Path) // file should be stored in the same place + suite.Equal(original.Thumbnail.Path, recachedAttachment.Thumbnail.Path) // as should the thumbnail + suite.EqualValues(original.FileMeta, recachedAttachment.FileMeta) // and the filemeta should be the same + + // recached files should be back in storage + _, err = suite.storage.Get(ctx, recachedAttachment.File.Path) + suite.NoError(err) + _, err = suite.storage.Get(ctx, recachedAttachment.Thumbnail.Path) + suite.NoError(err) + } } func (suite *PruneRemoteTestSuite) TestPruneOneNonExistent() { ctx := context.Background() - testAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] + testStatusAttachment := suite.testAttachments["remote_account_1_status_1_attachment_1"] // Delete this attachment cached on disk - media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID) + media, err := suite.db.GetAttachmentByID(ctx, testStatusAttachment.ID) suite.NoError(err) suite.True(*media.Cached) err = suite.storage.Delete(ctx, media.File.Path) @@ -118,7 +135,7 @@ func (suite *PruneRemoteTestSuite) TestPruneOneNonExistent() { // Now attempt to prune remote for item with db entry no file totalPruned, err := suite.manager.PruneAllRemote(ctx, 1) suite.NoError(err) - suite.Equal(2, totalPruned) + suite.Equal(3, totalPruned) } func TestPruneRemoteTestSuite(t *testing.T) { diff --git a/internal/media/pruneunusedlocal.go b/internal/media/pruneunusedlocal.go index ba74b7c90..7098648e0 100644 --- a/internal/media/pruneunusedlocal.go +++ b/internal/media/pruneunusedlocal.go @@ -20,7 +20,7 @@ package media import ( "context" - "fmt" + "time" "codeberg.org/gruf/go-store/v2/storage" "github.com/superseriousbusiness/gotosocial/internal/db" @@ -34,10 +34,7 @@ func (m *manager) PruneUnusedLocalAttachments(ctx context.Context) (int, error) var attachments []*gtsmodel.MediaAttachment var err error - olderThan, err := parseOlderThan(UnusedLocalAttachmentCacheDays) - if err != nil { - return totalPruned, fmt.Errorf("PruneUnusedLocalAttachments: error parsing olderThanDays %d: %s", UnusedLocalAttachmentCacheDays, err) - } + olderThan := time.Now().Add(-time.Hour * 24 * time.Duration(UnusedLocalAttachmentCacheDays)) log.Infof("PruneUnusedLocalAttachments: pruning unused local attachments older than %s", olderThan) // select 20 attachments at a time and prune them diff --git a/internal/media/util.go b/internal/media/util.go index 7efd788a1..60661cbc0 100644 --- a/internal/media/util.go +++ b/internal/media/util.go @@ -23,7 +23,6 @@ import ( "errors" "fmt" "io" - "time" "github.com/h2non/filetype" "github.com/superseriousbusiness/gotosocial/internal/log" @@ -134,22 +133,6 @@ func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{ log.Error("media manager cron logger: ", err, msg, keysAndValues) } -func parseOlderThan(olderThanDays int) (time.Time, error) { - // convert days into a duration string - olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24) - - // parse the duration string into a duration - olderThanHours, err := time.ParseDuration(olderThanHoursString) - if err != nil { - return time.Time{}, err - } - - // 'subtract' that from the time now to give our threshold - olderThan := time.Now().Add(-olderThanHours) - - return olderThan, nil -} - // lengthReader wraps a reader and reads the length of total bytes written as it goes. type lengthReader struct { source io.Reader |