diff options
author | 2022-11-25 18:23:42 +0100 | |
---|---|---|
committer | 2022-11-25 17:23:42 +0000 | |
commit | 13e9abd02a1f4003c7be922a22e8f1d095a55d61 (patch) | |
tree | ccc7b7bbb0d040dc1db84d581849a0e443f91698 /internal | |
parent | [bugfix] Change emailVerified to true for admin account create (#1140) (diff) | |
download | gotosocial-13e9abd02a1f4003c7be922a22e8f1d095a55d61.tar.xz |
[feature] Add `admin media prune orphaned` CLI command (#1146)
* add FilePath regex
* add `admin media prune orphaned` command
* add prune orphaned function to media manager
* don't mark flag as required
* document admin media prune orphaned cmd
* oh envparsing.sh you coy minx
Diffstat (limited to 'internal')
-rw-r--r-- | internal/config/config.go | 13 | ||||
-rw-r--r-- | internal/config/flags.go | 7 | ||||
-rw-r--r-- | internal/config/helpers.gen.go | 99 | ||||
-rw-r--r-- | internal/media/manager.go | 6 | ||||
-rw-r--r-- | internal/media/pruneorphaned.go | 127 | ||||
-rw-r--r-- | internal/media/pruneorphaned_test.go | 82 | ||||
-rw-r--r-- | internal/regexes/regexes.go | 7 |
7 files changed, 298 insertions, 43 deletions
diff --git a/internal/config/config.go b/internal/config/config.go index ecbd079e6..cc37e9bfd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -125,14 +125,15 @@ type Configuration struct { SyslogProtocol string `name:"syslog-protocol" usage:"Protocol to use when directing logs to syslog. Leave empty to connect to local syslog."` SyslogAddress string `name:"syslog-address" usage:"Address:port to send syslog logs to. Leave empty to connect to local syslog."` - // TODO: move these elsewhere, these are more ephemeral vs long-running flags like above - AdminAccountUsername string `name:"username" usage:"the username to create/delete/etc"` - AdminAccountEmail string `name:"email" usage:"the email address of this account"` - AdminAccountPassword string `name:"password" usage:"the password to set for this account"` - AdminTransPath string `name:"path" usage:"the path of the file to import from/export to"` - AdvancedCookiesSamesite string `name:"advanced-cookies-samesite" usage:"'strict' or 'lax', see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite"` AdvancedRateLimitRequests int `name:"advanced-rate-limit-requests" usage:"Amount of HTTP requests to permit within a 5 minute window. 0 or less turns rate limiting off."` + + // TODO: move these elsewhere, these are more ephemeral vs long-running flags like above + AdminAccountUsername string `name:"username" usage:"the username to create/delete/etc"` + AdminAccountEmail string `name:"email" usage:"the email address of this account"` + AdminAccountPassword string `name:"password" usage:"the password to set for this account"` + AdminTransPath string `name:"path" usage:"the path of the file to import from/export to"` + AdminMediaPruneDryRun bool `name:"dry-run" usage:"perform a dry run and only log number of items eligible for pruning"` } // MarshalMap will marshal current Configuration into a map structure (useful for JSON). diff --git a/internal/config/flags.go b/internal/config/flags.go index ddc3c60de..c5df1c8b2 100644 --- a/internal/config/flags.go +++ b/internal/config/flags.go @@ -178,3 +178,10 @@ func AddAdminTrans(cmd *cobra.Command) { panic(err) } } + +// AddAdminMediaPrune attaches flags pertaining to media storage prune commands. +func AddAdminMediaPrune(cmd *cobra.Command) { + name := AdminMediaPruneDryRunFlag() + usage := fieldtag("AdminMediaPruneDryRun", "usage") + cmd.Flags().Bool(name, true, usage) +} diff --git a/internal/config/helpers.gen.go b/internal/config/helpers.gen.go index 2786f5b5a..579814b99 100644 --- a/internal/config/helpers.gen.go +++ b/internal/config/helpers.gen.go @@ -1745,6 +1745,56 @@ func GetSyslogAddress() string { return global.GetSyslogAddress() } // SetSyslogAddress safely sets the value for global configuration 'SyslogAddress' field func SetSyslogAddress(v string) { global.SetSyslogAddress(v) } +// GetAdvancedCookiesSamesite safely fetches the Configuration value for state's 'AdvancedCookiesSamesite' field +func (st *ConfigState) GetAdvancedCookiesSamesite() (v string) { + st.mutex.Lock() + v = st.config.AdvancedCookiesSamesite + st.mutex.Unlock() + return +} + +// SetAdvancedCookiesSamesite safely sets the Configuration value for state's 'AdvancedCookiesSamesite' field +func (st *ConfigState) SetAdvancedCookiesSamesite(v string) { + st.mutex.Lock() + defer st.mutex.Unlock() + st.config.AdvancedCookiesSamesite = v + st.reloadToViper() +} + +// AdvancedCookiesSamesiteFlag returns the flag name for the 'AdvancedCookiesSamesite' field +func AdvancedCookiesSamesiteFlag() string { return "advanced-cookies-samesite" } + +// GetAdvancedCookiesSamesite safely fetches the value for global configuration 'AdvancedCookiesSamesite' field +func GetAdvancedCookiesSamesite() string { return global.GetAdvancedCookiesSamesite() } + +// SetAdvancedCookiesSamesite safely sets the value for global configuration 'AdvancedCookiesSamesite' field +func SetAdvancedCookiesSamesite(v string) { global.SetAdvancedCookiesSamesite(v) } + +// GetAdvancedRateLimitRequests safely fetches the Configuration value for state's 'AdvancedRateLimitRequests' field +func (st *ConfigState) GetAdvancedRateLimitRequests() (v int) { + st.mutex.Lock() + v = st.config.AdvancedRateLimitRequests + st.mutex.Unlock() + return +} + +// SetAdvancedRateLimitRequests safely sets the Configuration value for state's 'AdvancedRateLimitRequests' field +func (st *ConfigState) SetAdvancedRateLimitRequests(v int) { + st.mutex.Lock() + defer st.mutex.Unlock() + st.config.AdvancedRateLimitRequests = v + st.reloadToViper() +} + +// AdvancedRateLimitRequestsFlag returns the flag name for the 'AdvancedRateLimitRequests' field +func AdvancedRateLimitRequestsFlag() string { return "advanced-rate-limit-requests" } + +// GetAdvancedRateLimitRequests safely fetches the value for global configuration 'AdvancedRateLimitRequests' field +func GetAdvancedRateLimitRequests() int { return global.GetAdvancedRateLimitRequests() } + +// SetAdvancedRateLimitRequests safely sets the value for global configuration 'AdvancedRateLimitRequests' field +func SetAdvancedRateLimitRequests(v int) { global.SetAdvancedRateLimitRequests(v) } + // GetAdminAccountUsername safely fetches the Configuration value for state's 'AdminAccountUsername' field func (st *ConfigState) GetAdminAccountUsername() (v string) { st.mutex.Lock() @@ -1845,52 +1895,27 @@ func GetAdminTransPath() string { return global.GetAdminTransPath() } // SetAdminTransPath safely sets the value for global configuration 'AdminTransPath' field func SetAdminTransPath(v string) { global.SetAdminTransPath(v) } -// GetAdvancedCookiesSamesite safely fetches the Configuration value for state's 'AdvancedCookiesSamesite' field -func (st *ConfigState) GetAdvancedCookiesSamesite() (v string) { +// GetAdminMediaPruneDryRun safely fetches the Configuration value for state's 'AdminMediaPruneDryRun' field +func (st *ConfigState) GetAdminMediaPruneDryRun() (v bool) { st.mutex.Lock() - v = st.config.AdvancedCookiesSamesite + v = st.config.AdminMediaPruneDryRun st.mutex.Unlock() return } -// SetAdvancedCookiesSamesite safely sets the Configuration value for state's 'AdvancedCookiesSamesite' field -func (st *ConfigState) SetAdvancedCookiesSamesite(v string) { +// SetAdminMediaPruneDryRun safely sets the Configuration value for state's 'AdminMediaPruneDryRun' field +func (st *ConfigState) SetAdminMediaPruneDryRun(v bool) { st.mutex.Lock() defer st.mutex.Unlock() - st.config.AdvancedCookiesSamesite = v + st.config.AdminMediaPruneDryRun = v st.reloadToViper() } -// AdvancedCookiesSamesiteFlag returns the flag name for the 'AdvancedCookiesSamesite' field -func AdvancedCookiesSamesiteFlag() string { return "advanced-cookies-samesite" } +// AdminMediaPruneDryRunFlag returns the flag name for the 'AdminMediaPruneDryRun' field +func AdminMediaPruneDryRunFlag() string { return "dry-run" } -// GetAdvancedCookiesSamesite safely fetches the value for global configuration 'AdvancedCookiesSamesite' field -func GetAdvancedCookiesSamesite() string { return global.GetAdvancedCookiesSamesite() } +// GetAdminMediaPruneDryRun safely fetches the value for global configuration 'AdminMediaPruneDryRun' field +func GetAdminMediaPruneDryRun() bool { return global.GetAdminMediaPruneDryRun() } -// SetAdvancedCookiesSamesite safely sets the value for global configuration 'AdvancedCookiesSamesite' field -func SetAdvancedCookiesSamesite(v string) { global.SetAdvancedCookiesSamesite(v) } - -// GetAdvancedRateLimitRequests safely fetches the Configuration value for state's 'AdvancedRateLimitRequests' field -func (st *ConfigState) GetAdvancedRateLimitRequests() (v int) { - st.mutex.Lock() - v = st.config.AdvancedRateLimitRequests - st.mutex.Unlock() - return -} - -// SetAdvancedRateLimitRequests safely sets the Configuration value for state's 'AdvancedRateLimitRequests' field -func (st *ConfigState) SetAdvancedRateLimitRequests(v int) { - st.mutex.Lock() - defer st.mutex.Unlock() - st.config.AdvancedRateLimitRequests = v - st.reloadToViper() -} - -// AdvancedRateLimitRequestsFlag returns the flag name for the 'AdvancedRateLimitRequests' field -func AdvancedRateLimitRequestsFlag() string { return "advanced-rate-limit-requests" } - -// GetAdvancedRateLimitRequests safely fetches the value for global configuration 'AdvancedRateLimitRequests' field -func GetAdvancedRateLimitRequests() int { return global.GetAdvancedRateLimitRequests() } - -// SetAdvancedRateLimitRequests safely sets the value for global configuration 'AdvancedRateLimitRequests' field -func SetAdvancedRateLimitRequests(v int) { global.SetAdvancedRateLimitRequests(v) } +// SetAdminMediaPruneDryRun safely sets the value for global configuration 'AdminMediaPruneDryRun' field +func SetAdminMediaPruneDryRun(v bool) { global.SetAdminMediaPruneDryRun(v) } diff --git a/internal/media/manager.go b/internal/media/manager.go index d04f161d4..67c03fb31 100644 --- a/internal/media/manager.go +++ b/internal/media/manager.go @@ -91,6 +91,12 @@ type Manager interface { // // The returned int is the amount of media that was pruned by this function. PruneUnusedLocalAttachments(ctx context.Context) (int, error) + // PruneOrphaned prunes files that exist in storage but which do not have a corresponding + // entry in the database. + // + // If dry is true, then nothing will be changed, only the amount that *would* be removed + // is returned to the caller. + PruneOrphaned(ctx context.Context, dry bool) (int, error) // Stop stops the underlying worker pool of the manager. It should be called // when closing GoToSocial in order to cleanly finish any in-progress jobs. diff --git a/internal/media/pruneorphaned.go b/internal/media/pruneorphaned.go new file mode 100644 index 000000000..0d733cce5 --- /dev/null +++ b/internal/media/pruneorphaned.go @@ -0,0 +1,127 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package media + +import ( + "context" + "errors" + "fmt" + + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/regexes" +) + +func (m *manager) PruneOrphaned(ctx context.Context, dry bool) (int, error) { + var totalPruned int + + // keys in storage will look like the following: + // `[ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[FILE_NAME]` + // we can filter out keys we're not interested in by + // matching through a regex + var matchCount int + match := func(storageKey string) bool { + if regexes.FilePath.MatchString(storageKey) { + matchCount++ + return true + } + return false + } + + log.Info("checking storage keys for orphaned pruning candidates...") + iterator, err := m.storage.Iterator(ctx, match) + if err != nil { + return 0, fmt.Errorf("PruneOrphaned: error getting storage iterator: %s", err) + } + + // make sure we have some keys, and also advance + // the iterator to the first non-empty key + if !iterator.Next() { + return 0, nil + } + + // for each key in the iterator, check if entry is orphaned + log.Info("got %d orphaned pruning candidates, checking for orphaned status, please wait...") + var checkedKeys int + orphanedKeys := make([]string, 0, matchCount) + for key := iterator.Key(); iterator.Next(); key = iterator.Key() { + if m.orphaned(ctx, key) { + orphanedKeys = append(orphanedKeys, key) + } + checkedKeys++ + if checkedKeys%50 == 0 { + log.Infof("checked %d of %d orphaned pruning candidates...", checkedKeys, matchCount) + } + } + iterator.Release() + + if !dry { + // the real deal, we have to delete stuff + for _, key := range orphanedKeys { + log.Infof("key %s corresponds to orphaned media, will remove it now", key) + if err := m.storage.Delete(ctx, key); err != nil { + log.Errorf("error deleting item with key %s from storage: %s", key, err) + continue + } + totalPruned++ + } + } else { + // just a dry run, don't delete anything + for _, key := range orphanedKeys { + log.Infof("DRY RUN: key %s corresponds to orphaned media which would be deleted", key) + totalPruned++ + } + } + + return totalPruned, nil +} + +func (m *manager) orphaned(ctx context.Context, key string) bool { + pathParts := regexes.FilePath.FindStringSubmatch(key) + if len(pathParts) != 6 { + return false + } + + mediaType := pathParts[2] + mediaID := pathParts[4] + + var orphaned bool + switch Type(mediaType) { + case TypeAttachment, TypeHeader, TypeAvatar: + if _, err := m.db.GetAttachmentByID(ctx, mediaID); err != nil { + if errors.Is(err, db.ErrNoEntries) { + orphaned = true + } else { + log.Errorf("orphaned: error calling GetAttachmentByID: %s", err) + } + } + case TypeEmoji: + if _, err := m.db.GetEmojiByID(ctx, mediaID); err != nil { + if errors.Is(err, db.ErrNoEntries) { + orphaned = true + } else { + log.Errorf("orphaned: error calling GetEmojiByID: %s", err) + } + } + default: + orphaned = true + } + + return orphaned +} diff --git a/internal/media/pruneorphaned_test.go b/internal/media/pruneorphaned_test.go new file mode 100644 index 000000000..f791bde12 --- /dev/null +++ b/internal/media/pruneorphaned_test.go @@ -0,0 +1,82 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package media_test + +import ( + "bytes" + "context" + "os" + "testing" + + "github.com/stretchr/testify/suite" +) + +type PruneOrphanedTestSuite struct { + MediaStandardTestSuite +} + +func (suite *PruneOrphanedTestSuite) TestPruneOrphanedDry() { + // add a big orphan panda to store + b, err := os.ReadFile("./test/big-panda.gif") + if err != nil { + panic(err) + } + + pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" + if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil { + panic(err) + } + + // dry run should show up 1 orphaned panda + totalPruned, err := suite.manager.PruneOrphaned(context.Background(), true) + suite.NoError(err) + suite.Equal(1, totalPruned) + + // panda should still be in storage + hasKey, err := suite.storage.Has(context.Background(), pandaPath) + suite.NoError(err) + suite.True(hasKey) +} + +func (suite *PruneOrphanedTestSuite) TestPruneOrphanedMoist() { + // add a big orphan panda to store + b, err := os.ReadFile("./test/big-panda.gif") + if err != nil { + panic(err) + } + + pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" + if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil { + panic(err) + } + + // should show up 1 orphaned panda + totalPruned, err := suite.manager.PruneOrphaned(context.Background(), false) + suite.NoError(err) + suite.Equal(1, totalPruned) + + // panda should no longer be in storage + hasKey, err := suite.storage.Has(context.Background(), pandaPath) + suite.NoError(err) + suite.False(hasKey) +} + +func TestPruneOrphanedTestSuite(t *testing.T) { + suite.Run(t, &PruneOrphanedTestSuite{}) +} diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go index c9286611e..657a79b91 100644 --- a/internal/regexes/regexes.go +++ b/internal/regexes/regexes.go @@ -140,6 +140,13 @@ var ( // BlockPath parses a path that validates and captures the username part and the ulid part // from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH BlockPath = regexp.MustCompile(blockPath) + + filePath = fmt.Sprintf(`^(%s)/([a-z]+)/([a-z]+)/(%s)\.([a-z]+)$`, ulid, ulid) + // FilePath parses a file storage path of the form [ACCOUNT_ID]/[MEDIA_TYPE]/[MEDIA_SIZE]/[FILE_NAME] + // eg 01F8MH1H7YV1Z7D2C8K2730QBF/attachment/small/01F8MH8RMYQ6MSNY3JM2XT1CQ5.jpeg + // It captures the account id, media type, media size, file name, and file extension, eg + // `01F8MH1H7YV1Z7D2C8K2730QBF`, `attachment`, `small`, `01F8MH8RMYQ6MSNY3JM2XT1CQ5`, `jpeg`. + FilePath = regexp.MustCompile(filePath) ) // bufpool is a memory pool of byte buffers for use in our regex utility functions. |