diff options
| author | 2024-01-31 13:31:53 +0000 | |
|---|---|---|
| committer | 2024-01-31 13:31:53 +0000 | |
| commit | c9452f32f38b9ac1fb96a834202fd3e2f25897a1 (patch) | |
| tree | d61ed47e1c8ce010791ffa309da963f0304c92b8 | |
| parent | [bugfix] parent status replied to status not dereferenced sometimes (#2587) (diff) | |
| download | gotosocial-c9452f32f38b9ac1fb96a834202fd3e2f25897a1.tar.xz | |
[bugfix] fix possible infinite loops in media / emoji cleanup (#2590)
* update media / emoji cleaner funcs to use new paging package, check for same returned maxID
* fix other calls of getattachments and getmojis not using paging
* use alternative order-by function
| -rw-r--r-- | cmd/gotosocial/action/admin/media/list.go | 66 | ||||
| -rw-r--r-- | internal/cleaner/emoji.go | 63 | ||||
| -rw-r--r-- | internal/cleaner/media.go | 37 | ||||
| -rw-r--r-- | internal/db/bundb/emoji.go | 15 | ||||
| -rw-r--r-- | internal/db/bundb/media.go | 11 | ||||
| -rw-r--r-- | internal/db/emoji.go | 5 | ||||
| -rw-r--r-- | internal/db/media.go | 5 | ||||
| -rw-r--r-- | internal/util/slices.go | 71 | 
8 files changed, 156 insertions, 117 deletions
diff --git a/cmd/gotosocial/action/admin/media/list.go b/cmd/gotosocial/action/admin/media/list.go index 0a2e60ede..ed10c967a 100644 --- a/cmd/gotosocial/action/admin/media/list.go +++ b/cmd/gotosocial/action/admin/media/list.go @@ -31,14 +31,14 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/db/bundb"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/superseriousbusiness/gotosocial/internal/paging"  	"github.com/superseriousbusiness/gotosocial/internal/state"  )  type list struct {  	dbService  db.DB  	state      *state.State -	maxID      string -	limit      int +	page       paging.Page  	localOnly  bool  	remoteOnly bool  	out        *bufio.Writer @@ -47,31 +47,32 @@ type list struct {  // Get a list of attachment using a custom filter  func (l *list) GetAllAttachmentPaths(ctx context.Context, filter func(*gtsmodel.MediaAttachment) string) ([]string, error) {  	res := make([]string, 0, 100) +  	for { -		attachments, err := l.dbService.GetAttachments(ctx, l.maxID, l.limit) -		if err != nil { +		// Get the next page of media attachments up to max ID. +		attachments, err := l.dbService.GetAttachments(ctx, &l.page) +		if err != nil && !errors.Is(err, db.ErrNoEntries) {  			return nil, fmt.Errorf("failed to retrieve media metadata from database: %w", err)  		} +		// Get current max ID. +		maxID := l.page.Max.Value + +		// If no attachments or the same group is returned, we reached the end. +		if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID { +			break +		} + +		// Use last ID as the next 'maxID' value. +		maxID = attachments[len(attachments)-1].ID +		l.page.Max = paging.MaxID(maxID) +  		for _, a := range attachments {  			v := filter(a)  			if v != "" {  				res = append(res, v)  			}  		} - -		// If we got less results than our limit, we've reached the -		// last page to retrieve and we can break the loop. If the -		// last batch happens to contain exactly the same amount of -		// items as the limit we'll end up doing one extra query. -		if len(attachments) < l.limit { -			break -		} - -		// Grab the last ID from the batch and set it as the maxID -		// that'll be used in the next iteration so we don't get items -		// we've already seen. -		l.maxID = attachments[len(attachments)-1].ID  	}  	return res, nil  } @@ -80,30 +81,30 @@ func (l *list) GetAllAttachmentPaths(ctx context.Context, filter func(*gtsmodel.  func (l *list) GetAllEmojisPaths(ctx context.Context, filter func(*gtsmodel.Emoji) string) ([]string, error) {  	res := make([]string, 0, 100)  	for { -		attachments, err := l.dbService.GetEmojis(ctx, l.maxID, l.limit) +		// Get the next page of emoji media up to max ID. +		attachments, err := l.dbService.GetEmojis(ctx, &l.page)  		if err != nil {  			return nil, fmt.Errorf("failed to retrieve media metadata from database: %w", err)  		} +		// Get current max ID. +		maxID := l.page.Max.Value + +		// If no attachments or the same group is returned, we reached the end. +		if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID { +			break +		} + +		// Use last ID as the next 'maxID' value. +		maxID = attachments[len(attachments)-1].ID +		l.page.Max = paging.MaxID(maxID) +  		for _, a := range attachments {  			v := filter(a)  			if v != "" {  				res = append(res, v)  			}  		} - -		// If we got less results than our limit, we've reached the -		// last page to retrieve and we can break the loop. If the -		// last batch happens to contain exactly the same amount of -		// items as the limit we'll end up doing one extra query. -		if len(attachments) < l.limit { -			break -		} - -		// Grab the last ID from the batch and set it as the maxID -		// that'll be used in the next iteration so we don't get items -		// we've already seen. -		l.maxID = attachments[len(attachments)-1].ID  	}  	return res, nil  } @@ -137,8 +138,7 @@ func setupList(ctx context.Context) (*list, error) {  	return &list{  		dbService:  dbService,  		state:      &state, -		limit:      200, -		maxID:      "", +		page:       paging.Page{Limit: 200},  		localOnly:  localOnly,  		remoteOnly: remoteOnly,  		out:        bufio.NewWriter(os.Stdout), diff --git a/internal/cleaner/emoji.go b/internal/cleaner/emoji.go index d2baec7e8..62ed0f012 100644 --- a/internal/cleaner/emoji.go +++ b/internal/cleaner/emoji.go @@ -27,6 +27,7 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/superseriousbusiness/gotosocial/internal/paging"  )  // Emoji encompasses a set of @@ -105,8 +106,9 @@ func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er  			return total, gtserror.Newf("error getting remote emoji: %w", err)  		} -		if len(emojis) == 0 { -			// reached end. +		// If no emojis / same group is returned, we reached the end. +		if len(emojis) == 0 || +			olderThan.Equal(emojis[len(emojis)-1].CreatedAt) {  			break  		} @@ -140,23 +142,30 @@ func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er  func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  	var (  		total int -		maxID string +		page  paging.Page  	) +	// Set page select limit. +	page.Limit = selectLimit +  	for { -		// Fetch the next batch of emoji media up to next ID. -		emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit) +		// Fetch the next batch of emoji to next max ID. +		emojis, err := e.state.DB.GetEmojis(ctx, &page)  		if err != nil && !errors.Is(err, db.ErrNoEntries) {  			return total, gtserror.Newf("error getting emojis: %w", err)  		} -		if len(emojis) == 0 { -			// reached end. +		// Get current max ID. +		maxID := page.Max.Value + +		// If no emoji or the same group is returned, we reached end. +		if len(emojis) == 0 || maxID == emojis[len(emojis)-1].ID {  			break  		} -		// Use last as the next 'maxID' value. +		// Use last ID as the next 'maxID'.  		maxID = emojis[len(emojis)-1].ID +		page.Max = paging.MaxID(maxID)  		for _, emoji := range emojis {  			// Check / fix missing broken emoji. @@ -182,23 +191,30 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) {  func (e *Emoji) PruneUnused(ctx context.Context) (int, error) {  	var (  		total int -		maxID string +		page  paging.Page  	) +	// Set page select limit. +	page.Limit = selectLimit +  	for { -		// Fetch the next batch of emoji media up to next ID. -		emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit) +		// Fetch the next batch of emoji to next max ID. +		emojis, err := e.state.DB.GetRemoteEmojis(ctx, &page)  		if err != nil && !errors.Is(err, db.ErrNoEntries) {  			return total, gtserror.Newf("error getting remote emojis: %w", err)  		} -		if len(emojis) == 0 { -			// reached end. +		// Get current max ID. +		maxID := page.Max.Value + +		// If no emoji or the same group is returned, we reached end. +		if len(emojis) == 0 || maxID == emojis[len(emojis)-1].ID {  			break  		} -		// Use last as the next 'maxID' value. +		// Use last ID as the next 'maxID'.  		maxID = emojis[len(emojis)-1].ID +		page.Max = paging.MaxID(maxID)  		for _, emoji := range emojis {  			// Check / prune unused emoji media. @@ -224,23 +240,30 @@ func (e *Emoji) PruneUnused(ctx context.Context) (int, error) {  func (e *Emoji) FixCacheStates(ctx context.Context) (int, error) {  	var (  		total int -		maxID string +		page  paging.Page  	) +	// Set page select limit. +	page.Limit = selectLimit +  	for { -		// Fetch the next batch of emoji media up to next ID. -		emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit) +		// Fetch the next batch of emoji to next max ID. +		emojis, err := e.state.DB.GetRemoteEmojis(ctx, &page)  		if err != nil && !errors.Is(err, db.ErrNoEntries) {  			return total, gtserror.Newf("error getting remote emojis: %w", err)  		} -		if len(emojis) == 0 { -			// reached end. +		// Get current max ID. +		maxID := page.Max.Value + +		// If no emoji or the same group is returned, we reached end. +		if len(emojis) == 0 || maxID == emojis[len(emojis)-1].ID {  			break  		} -		// Use last as the next 'maxID' value. +		// Use last ID as the next 'maxID'.  		maxID = emojis[len(emojis)-1].ID +		page.Max = paging.MaxID(maxID)  		for _, emoji := range emojis {  			// Check / fix required emoji cache states. diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go index 6db205d13..f3cda5d87 100644 --- a/internal/cleaner/media.go +++ b/internal/cleaner/media.go @@ -28,6 +28,7 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log"  	"github.com/superseriousbusiness/gotosocial/internal/media" +	"github.com/superseriousbusiness/gotosocial/internal/paging"  	"github.com/superseriousbusiness/gotosocial/internal/regexes"  	"github.com/superseriousbusiness/gotosocial/internal/uris"  ) @@ -128,23 +129,30 @@ func (m *Media) PruneOrphaned(ctx context.Context) (int, error) {  func (m *Media) PruneUnused(ctx context.Context) (int, error) {  	var (  		total int -		maxID string +		page  paging.Page  	) +	// Set page select limit. +	page.Limit = selectLimit +  	for { -		// Fetch the next batch of media attachments up to next max ID. -		attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) +		// Fetch the next batch of media attachments to next maxID. +		attachments, err := m.state.DB.GetAttachments(ctx, &page)  		if err != nil && !errors.Is(err, db.ErrNoEntries) {  			return total, gtserror.Newf("error getting attachments: %w", err)  		} -		if len(attachments) == 0 { -			// reached end. +		// Get current max ID. +		maxID := page.Max.Value + +		// If no attachments or the same group is returned, we reached the end. +		if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID {  			break  		}  		// Use last ID as the next 'maxID' value.  		maxID = attachments[len(attachments)-1].ID +		page.Max = paging.MaxID(maxID)  		for _, media := range attachments {  			// Check / prune unused media attachment. @@ -183,8 +191,9 @@ func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er  			return total, gtserror.Newf("error getting remote attachments: %w", err)  		} -		if len(attachments) == 0 { -			// reached end. +		// If no attachments / same group is returned, we reached the end. +		if len(attachments) == 0 || +			olderThan.Equal(attachments[len(attachments)-1].CreatedAt) {  			break  		} @@ -215,23 +224,29 @@ func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er  func (m *Media) FixCacheStates(ctx context.Context) (int, error) {  	var (  		total int -		maxID string +		page  paging.Page  	) +	// Set page select limit. +	page.Limit = selectLimit +  	for {  		// Fetch the next batch of media attachments up to next max ID. -		attachments, err := m.state.DB.GetRemoteAttachments(ctx, maxID, selectLimit) +		attachments, err := m.state.DB.GetRemoteAttachments(ctx, &page)  		if err != nil && !errors.Is(err, db.ErrNoEntries) {  			return total, gtserror.Newf("error getting remote attachments: %w", err)  		} +		// Get current max ID. +		maxID := page.Max.Value -		if len(attachments) == 0 { -			// reached end. +		// If no attachments or the same group is returned, we reached the end. +		if len(attachments) == 0 || maxID == attachments[len(attachments)-1].ID {  			break  		}  		// Use last ID as the next 'maxID' value.  		maxID = attachments[len(attachments)-1].ID +		page.Max = paging.MaxID(maxID)  		for _, media := range attachments {  			// Check / fix required media cache states. diff --git a/internal/db/bundb/emoji.go b/internal/db/bundb/emoji.go index d1cb9dfbd..608cb6417 100644 --- a/internal/db/bundb/emoji.go +++ b/internal/db/bundb/emoji.go @@ -30,6 +30,7 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/superseriousbusiness/gotosocial/internal/paging"  	"github.com/superseriousbusiness/gotosocial/internal/state"  	"github.com/superseriousbusiness/gotosocial/internal/util"  	"github.com/uptrace/bun" @@ -326,8 +327,11 @@ func (e *emojiDB) GetEmojisBy(ctx context.Context, domain string, includeDisable  	return e.GetEmojisByIDs(ctx, emojiIDs)  } -func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) { -	var emojiIDs []string +func (e *emojiDB) GetEmojis(ctx context.Context, page *paging.Page) ([]*gtsmodel.Emoji, error) { +	maxID := page.GetMax() +	limit := page.GetLimit() + +	emojiIDs := make([]string, 0, limit)  	q := e.db.NewSelect().  		Table("emojis"). @@ -349,8 +353,11 @@ func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gt  	return e.GetEmojisByIDs(ctx, emojiIDs)  } -func (e *emojiDB) GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) { -	var emojiIDs []string +func (e *emojiDB) GetRemoteEmojis(ctx context.Context, page *paging.Page) ([]*gtsmodel.Emoji, error) { +	maxID := page.GetMax() +	limit := page.GetLimit() + +	emojiIDs := make([]string, 0, limit)  	q := e.db.NewSelect().  		Table("emojis"). diff --git a/internal/db/bundb/media.go b/internal/db/bundb/media.go index ce3c90083..ced38a588 100644 --- a/internal/db/bundb/media.go +++ b/internal/db/bundb/media.go @@ -27,6 +27,7 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/gtscontext"  	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/paging"  	"github.com/superseriousbusiness/gotosocial/internal/state"  	"github.com/superseriousbusiness/gotosocial/internal/util"  	"github.com/uptrace/bun" @@ -232,7 +233,10 @@ func (m *mediaDB) DeleteAttachment(ctx context.Context, id string) error {  	return err  } -func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) { +func (m *mediaDB) GetAttachments(ctx context.Context, page *paging.Page) ([]*gtsmodel.MediaAttachment, error) { +	maxID := page.GetMax() +	limit := page.GetLimit() +  	attachmentIDs := make([]string, 0, limit)  	q := m.db.NewSelect(). @@ -255,7 +259,10 @@ func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) (  	return m.GetAttachmentsByIDs(ctx, attachmentIDs)  } -func (m *mediaDB) GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) { +func (m *mediaDB) GetRemoteAttachments(ctx context.Context, page *paging.Page) ([]*gtsmodel.MediaAttachment, error) { +	maxID := page.GetMax() +	limit := page.GetLimit() +  	attachmentIDs := make([]string, 0, limit)  	q := m.db.NewSelect(). diff --git a/internal/db/emoji.go b/internal/db/emoji.go index fed956933..26a881dbc 100644 --- a/internal/db/emoji.go +++ b/internal/db/emoji.go @@ -22,6 +22,7 @@ import (  	"time"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/paging"  )  // EmojiAllDomains can be used as the `domain` value in a GetEmojis @@ -47,10 +48,10 @@ type Emoji interface {  	GetUseableEmojis(ctx context.Context) ([]*gtsmodel.Emoji, error)  	// GetEmojis fetches all emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis. -	GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) +	GetEmojis(ctx context.Context, page *paging.Page) ([]*gtsmodel.Emoji, error)  	// GetRemoteEmojis fetches all remote emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis. -	GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) +	GetRemoteEmojis(ctx context.Context, page *paging.Page) ([]*gtsmodel.Emoji, error)  	// GetCachedEmojisOlderThan fetches all cached remote emojis with 'updated_at' greater than 'olderThan', up to a maximum of 'limit' emojis.  	GetCachedEmojisOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.Emoji, error) diff --git a/internal/db/media.go b/internal/db/media.go index 0ef03226b..a41f8970a 100644 --- a/internal/db/media.go +++ b/internal/db/media.go @@ -22,6 +22,7 @@ import (  	"time"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/paging"  )  // Media contains functions related to creating/getting/removing media attachments. @@ -42,10 +43,10 @@ type Media interface {  	DeleteAttachment(ctx context.Context, id string) error  	// GetAttachments fetches media attachments up to a given max ID, and at most limit. -	GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) +	GetAttachments(ctx context.Context, page *paging.Page) ([]*gtsmodel.MediaAttachment, error)  	// GetRemoteAttachments fetches media attachments with a non-empty domain, up to a given max ID, and at most limit. -	GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) +	GetRemoteAttachments(ctx context.Context, page *paging.Page) ([]*gtsmodel.MediaAttachment, error)  	// GetCachedAttachmentsOlderThan gets limit n remote attachments (including avatars and headers) older than  	// the given time. These will be returned in order of attachment.created_at descending (i.e. newest to oldest). diff --git a/internal/util/slices.go b/internal/util/slices.go index 51d560dbd..0505229e5 100644 --- a/internal/util/slices.go +++ b/internal/util/slices.go @@ -17,6 +17,8 @@  package util +import "slices" +  // Deduplicate deduplicates entries in the given slice.  func Deduplicate[T comparable](in []T) []T {  	var ( @@ -47,6 +49,10 @@ func DeduplicateFunc[T any, C comparable](in []T, key func(v T) C) []T {  		deduped = make([]T, 0, inL)  	) +	if key == nil { +		panic("nil func") +	} +  	for _, v := range in {  		k := key(v) @@ -66,6 +72,10 @@ func DeduplicateFunc[T any, C comparable](in []T, key func(v T) C) []T {  // passing each item to 'get' and deduplicating the end result.  // Compared to Deduplicate() this returns []K, NOT input type []T.  func Collate[T any, K comparable](in []T, get func(T) K) []K { +	if get == nil { +		panic("nil func") +	} +  	ks := make([]K, 0, len(in))  	km := make(map[K]struct{}, len(in)) @@ -86,50 +96,25 @@ func Collate[T any, K comparable](in []T, get func(T) K) []K {  // OrderBy orders a slice of given type by the provided alternative slice of comparable type.  func OrderBy[T any, K comparable](in []T, keys []K, key func(T) K) { -	var ( -		start  int -		offset int -	) - -	for i := 0; i < len(keys); i++ { -		var ( -			// key at index. -			k = keys[i] - -			// sentinel -			// idx value. -			idx = -1 -		) - -		// Look for model with key in slice. -		for j := start; j < len(in); j++ { -			if key(in[j]) == k { -				idx = j -				break -			} -		} - -		if idx == -1 { -			// model with key -			// was not found. -			offset++ -			continue -		} - -		// Update -		// start -		start++ +	if key == nil { +		panic("nil func") +	} -		// Expected ID index. -		exp := i - offset +	// Create lookup of keys->idx. +	m := make(map[K]int, len(in)) +	for i, k := range keys { +		m[k] = i +	} -		if idx == exp { -			// Model is in expected -			// location, keep going. -			continue +	// Sort according to the reverse lookup. +	slices.SortFunc(in, func(a, b T) int { +		ai := m[key(a)] +		bi := m[key(b)] +		if ai < bi { +			return -1 +		} else if bi < ai { +			return +1  		} - -		// Swap models at current and expected. -		in[idx], in[exp] = in[exp], in[idx] -	} +		return 0 +	})  }  | 
