From 2bdff66f0a12a16684e5d25bcace551446ec1c78 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 15 Oct 2025 13:32:02 +0200 Subject: [performance] cache account IDs in home timeline query not in exclusive lists (#4502) this caches the stage of the home timeline query in which we calculate which account IDs should be shown in a particular user's timeline. Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4502 Co-authored-by: kim Co-committed-by: kim --- internal/cache/cache.go | 1 + internal/cache/db.go | 15 +++++ internal/cache/invalidate.go | 7 +++ internal/config/config.go | 1 + internal/config/defaults.go | 1 + internal/config/helpers.gen.go | 47 ++++++++++++++- internal/db/bundb/list.go | 19 +++++- internal/db/bundb/timeline.go | 130 +++++++++++++++++++++-------------------- test/envparsing.sh | 1 + 9 files changed, 156 insertions(+), 66 deletions(-) diff --git a/internal/cache/cache.go b/internal/cache/cache.go index 2cc07de96..5bfa70ffa 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -96,6 +96,7 @@ func (c *Caches) Init() { c.initFollowRequest() c.initFollowRequestIDs() c.initFollowingTagIDs() + c.initHomeAccountIDs() c.initHomeTimelines() c.initInReplyToIDs() c.initInstance() diff --git a/internal/cache/db.go b/internal/cache/db.go index 8a8f59539..d5f25516b 100644 --- a/internal/cache/db.go +++ b/internal/cache/db.go @@ -145,6 +145,10 @@ type DBCaches struct { // FollowingTagIDs SliceCache[string] + // HomeAccountIDs provides access to the account IDs present in an account's + // home timeline, bearing in mind that some may be part of exclusive lists. + HomeAccountIDs SliceCache[string] + // Instance provides access to the gtsmodel Instance database cache. Instance StructCache[*gtsmodel.Instance] @@ -883,6 +887,17 @@ func (c *Caches) initFollowingTagIDs() { c.DB.FollowingTagIDs.Init(0, cap) } +func (c *Caches) initHomeAccountIDs() { + // Calculate maximum cache size. + cap := calculateSliceCacheMax( + config.GetCacheHomeAccountIDsMemRatio(), + ) + + log.Infof(nil, "cache size = %d", cap) + + c.DB.HomeAccountIDs.Init(0, cap) +} + func (c *Caches) initInReplyToIDs() { // Calculate maximum cache size. cap := calculateSliceCacheMax( diff --git a/internal/cache/invalidate.go b/internal/cache/invalidate.go index c6c25d4eb..58c427050 100644 --- a/internal/cache/invalidate.go +++ b/internal/cache/invalidate.go @@ -155,6 +155,9 @@ func (c *Caches) OnInvalidateFollow(follow *gtsmodel.Follow) { // results for them as mute / visibility result requester. if follow.Account == nil || follow.Account.IsLocal() { localAccountIDs = append(localAccountIDs, follow.AccountID) + + // Also invalidate their home account IDs cache. + c.DB.HomeAccountIDs.Invalidate(follow.AccountID) } // If target is local (or uncertain), also invalidate @@ -261,6 +264,10 @@ func (c *Caches) OnInvalidateList(list *gtsmodel.List) { // follow IDs in list. "f"+list.ID, ) + + // Invalidate user's home account IDs cache, + // as list exclusivity flag may have changed. + c.DB.HomeAccountIDs.Invalidate(list.AccountID) } func (c *Caches) OnInvalidateMedia(media *gtsmodel.MediaAttachment) { diff --git a/internal/config/config.go b/internal/config/config.go index 8768584fa..dfc919f11 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -240,6 +240,7 @@ type CacheConfiguration struct { FollowRequestMemRatio float64 `name:"follow-request-mem-ratio"` FollowRequestIDsMemRatio float64 `name:"follow-request-ids-mem-ratio"` FollowingTagIDsMemRatio float64 `name:"following-tag-ids-mem-ratio"` + HomeAccountIDsMemRatio float64 `name:"home-account-ids-mem-ratio"` InReplyToIDsMemRatio float64 `name:"in-reply-to-ids-mem-ratio"` InstanceMemRatio float64 `name:"instance-mem-ratio"` InteractionRequestMemRatio float64 `name:"interaction-request-mem-ratio"` diff --git a/internal/config/defaults.go b/internal/config/defaults.go index e3e3c559c..a4996e5c6 100644 --- a/internal/config/defaults.go +++ b/internal/config/defaults.go @@ -200,6 +200,7 @@ var Defaults = Configuration{ FollowRequestMemRatio: 2, FollowRequestIDsMemRatio: 2, FollowingTagIDsMemRatio: 2, + HomeAccountIDsMemRatio: 2, InReplyToIDsMemRatio: 3, InstanceMemRatio: 1, InteractionRequestMemRatio: 1, diff --git a/internal/config/helpers.gen.go b/internal/config/helpers.gen.go index d54d72b53..217917bcf 100644 --- a/internal/config/helpers.gen.go +++ b/internal/config/helpers.gen.go @@ -179,6 +179,7 @@ const ( CacheFollowRequestMemRatioFlag = "cache-follow-request-mem-ratio" CacheFollowRequestIDsMemRatioFlag = "cache-follow-request-ids-mem-ratio" CacheFollowingTagIDsMemRatioFlag = "cache-following-tag-ids-mem-ratio" + CacheHomeAccountIDsMemRatioFlag = "cache-home-account-ids-mem-ratio" CacheInReplyToIDsMemRatioFlag = "cache-in-reply-to-ids-mem-ratio" CacheInstanceMemRatioFlag = "cache-instance-mem-ratio" CacheInteractionRequestMemRatioFlag = "cache-interaction-request-mem-ratio" @@ -377,6 +378,7 @@ func (cfg *Configuration) RegisterFlags(flags *pflag.FlagSet) { flags.Float64("cache-follow-request-mem-ratio", cfg.Cache.FollowRequestMemRatio, "") flags.Float64("cache-follow-request-ids-mem-ratio", cfg.Cache.FollowRequestIDsMemRatio, "") flags.Float64("cache-following-tag-ids-mem-ratio", cfg.Cache.FollowingTagIDsMemRatio, "") + flags.Float64("cache-home-account-ids-mem-ratio", cfg.Cache.HomeAccountIDsMemRatio, "") flags.Float64("cache-in-reply-to-ids-mem-ratio", cfg.Cache.InReplyToIDsMemRatio, "") flags.Float64("cache-instance-mem-ratio", cfg.Cache.InstanceMemRatio, "") flags.Float64("cache-interaction-request-mem-ratio", cfg.Cache.InteractionRequestMemRatio, "") @@ -416,7 +418,7 @@ func (cfg *Configuration) RegisterFlags(flags *pflag.FlagSet) { } func (cfg *Configuration) MarshalMap() map[string]any { - cfgmap := make(map[string]any, 195) + cfgmap := make(map[string]any, 196) cfgmap["log-level"] = cfg.LogLevel cfgmap["log-format"] = cfg.LogFormat cfgmap["log-timestamp-format"] = cfg.LogTimestampFormat @@ -567,6 +569,7 @@ func (cfg *Configuration) MarshalMap() map[string]any { cfgmap["cache-follow-request-mem-ratio"] = cfg.Cache.FollowRequestMemRatio cfgmap["cache-follow-request-ids-mem-ratio"] = cfg.Cache.FollowRequestIDsMemRatio cfgmap["cache-following-tag-ids-mem-ratio"] = cfg.Cache.FollowingTagIDsMemRatio + cfgmap["cache-home-account-ids-mem-ratio"] = cfg.Cache.HomeAccountIDsMemRatio cfgmap["cache-in-reply-to-ids-mem-ratio"] = cfg.Cache.InReplyToIDsMemRatio cfgmap["cache-instance-mem-ratio"] = cfg.Cache.InstanceMemRatio cfgmap["cache-interaction-request-mem-ratio"] = cfg.Cache.InteractionRequestMemRatio @@ -1855,6 +1858,14 @@ func (cfg *Configuration) UnmarshalMap(cfgmap map[string]any) error { } } + if ival, ok := cfgmap["cache-home-account-ids-mem-ratio"]; ok { + var err error + cfg.Cache.HomeAccountIDsMemRatio, err = cast.ToFloat64E(ival) + if err != nil { + return fmt.Errorf("error casting %#v -> float64 for 'cache-home-account-ids-mem-ratio': %w", ival, err) + } + } + if ival, ok := cfgmap["cache-in-reply-to-ids-mem-ratio"]; ok { var err error cfg.Cache.InReplyToIDsMemRatio, err = cast.ToFloat64E(ival) @@ -5536,6 +5547,28 @@ func GetCacheFollowingTagIDsMemRatio() float64 { return global.GetCacheFollowing // SetCacheFollowingTagIDsMemRatio safely sets the value for global configuration 'Cache.FollowingTagIDsMemRatio' field func SetCacheFollowingTagIDsMemRatio(v float64) { global.SetCacheFollowingTagIDsMemRatio(v) } +// GetCacheHomeAccountIDsMemRatio safely fetches the Configuration value for state's 'Cache.HomeAccountIDsMemRatio' field +func (st *ConfigState) GetCacheHomeAccountIDsMemRatio() (v float64) { + st.mutex.RLock() + v = st.config.Cache.HomeAccountIDsMemRatio + st.mutex.RUnlock() + return v +} + +// SetCacheHomeAccountIDsMemRatio safely sets the Configuration value for state's 'Cache.HomeAccountIDsMemRatio' field +func (st *ConfigState) SetCacheHomeAccountIDsMemRatio(v float64) { + st.mutex.Lock() + defer st.mutex.Unlock() + st.config.Cache.HomeAccountIDsMemRatio = v + st.reloadToViper() +} + +// GetCacheHomeAccountIDsMemRatio safely fetches the value for global configuration 'Cache.HomeAccountIDsMemRatio' field +func GetCacheHomeAccountIDsMemRatio() float64 { return global.GetCacheHomeAccountIDsMemRatio() } + +// SetCacheHomeAccountIDsMemRatio safely sets the value for global configuration 'Cache.HomeAccountIDsMemRatio' field +func SetCacheHomeAccountIDsMemRatio(v float64) { global.SetCacheHomeAccountIDsMemRatio(v) } + // GetCacheInReplyToIDsMemRatio safely fetches the Configuration value for state's 'Cache.InReplyToIDsMemRatio' field func (st *ConfigState) GetCacheInReplyToIDsMemRatio() (v float64) { st.mutex.RLock() @@ -6559,6 +6592,7 @@ func (st *ConfigState) GetTotalOfMemRatios() (total float64) { total += st.config.Cache.FollowRequestMemRatio total += st.config.Cache.FollowRequestIDsMemRatio total += st.config.Cache.FollowingTagIDsMemRatio + total += st.config.Cache.HomeAccountIDsMemRatio total += st.config.Cache.InReplyToIDsMemRatio total += st.config.Cache.InstanceMemRatio total += st.config.Cache.InteractionRequestMemRatio @@ -7169,6 +7203,17 @@ func flattenConfigMap(cfgmap map[string]any) { } } + for _, key := range [][]string{ + {"cache", "home-account-ids-mem-ratio"}, + } { + ival, ok := mapGet(cfgmap, key...) + if ok { + cfgmap["cache-home-account-ids-mem-ratio"] = ival + nestedKeys[key[0]] = struct{}{} + break + } + } + for _, key := range [][]string{ {"cache", "in-reply-to-ids-mem-ratio"}, } { diff --git a/internal/db/bundb/list.go b/internal/db/bundb/list.go index e1afa64d4..3181dafcc 100644 --- a/internal/db/bundb/list.go +++ b/internal/db/bundb/list.go @@ -20,7 +20,6 @@ package bundb import ( "context" "errors" - "fmt" "slices" "time" @@ -358,13 +357,13 @@ func (l *listDB) PopulateListEntry(ctx context.Context, listEntry *gtsmodel.List var err error if listEntry.Follow == nil { - // ListEntry follow is not set, fetch from the database. + // ListEntry follow is not set, fetch from database. listEntry.Follow, err = l.state.DB.GetFollowByID( gtscontext.SetBarebones(ctx), listEntry.FollowID, ) if err != nil { - return fmt.Errorf("error populating listEntry follow: %w", err) + return gtserror.Newf("error populating follow: %w", err) } } @@ -454,6 +453,10 @@ func (l *listDB) DeleteAllListEntriesByFollows(ctx context.Context, followIDs .. func (l *listDB) invalidateEntryCaches(ctx context.Context, listIDs, followIDs []string) { var keys []string + // Anything requested in this func + // will only ever be barbones model. + ctx = gtscontext.SetBarebones(ctx) + // Generate ListedID keys to invalidate. keys = slices.Grow(keys[:0], 2*len(listIDs)) for _, listID := range listIDs { @@ -464,6 +467,16 @@ func (l *listDB) invalidateEntryCaches(ctx context.Context, listIDs, followIDs [ // Invalidate list timeline cache by ID. l.state.Caches.Timelines.List.Clear(listID) + + // Fetch from DB the list by given ID. + list, err := l.GetListByID(ctx, listID) + if err != nil { + log.Errorf(ctx, "error getting list: %v", err) + continue + } + + // Invalidate home account IDs slice cache for list owner. + l.state.Caches.DB.HomeAccountIDs.Invalidate(list.AccountID) } // Invalidate ListedID slice cache entries. diff --git a/internal/db/bundb/timeline.go b/internal/db/bundb/timeline.go index 3b217cc5c..0e330f258 100644 --- a/internal/db/bundb/timeline.go +++ b/internal/db/bundb/timeline.go @@ -47,75 +47,18 @@ func (t *timelineDB) GetHomeTimeline(ctx context.Context, accountID string, page // of any paging parameters that selects by followings. func(q *bun.SelectQuery) (*bun.SelectQuery, error) { - // As this is the home timeline, it should be - // populated by statuses from accounts followed - // by accountID, and posts from accountID itself. - // - // So, begin by seeing who accountID follows. - // It should be a little cheaper to do this in - // a separate query like this, rather than using - // a join, since followIDs are cached in memory. - follows, err := t.state.DB.GetAccountFollows( - gtscontext.SetBarebones(ctx), - accountID, - nil, // select all - ) - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return nil, gtserror.Newf("db error getting follows for account %s: %w", accountID, err) - } - - // To take account of exclusive lists, get all of - // this account's lists, so we can filter out follows - // that are in contained in exclusive lists. - lists, err := t.state.DB.GetListsByAccountID(ctx, accountID) - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return nil, gtserror.Newf("db error getting lists for account %s: %w", accountID, err) - } - - // Index all follow IDs that fall in exclusive lists. - ignoreFollowIDs := make(map[string]struct{}) - for _, list := range lists { - if !*list.Exclusive { - // Not exclusive, - // we don't care. - continue - } - - // Fetch all follow IDs of the entries ccontained in this list. - listFollowIDs, err := t.state.DB.GetFollowIDsInList(ctx, list.ID, nil) - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return nil, gtserror.Newf("db error getting list entry follow ids: %w", err) - } - - // Exclusive list, index all its follow IDs. - for _, followID := range listFollowIDs { - ignoreFollowIDs[followID] = struct{}{} - } - } - - // Extract just the accountID from each follow, - // ignoring follows that are in exclusive lists. - targetAccountIDs := make([]string, 0, len(follows)+1) - for _, f := range follows { - _, ignore := ignoreFollowIDs[f.ID] - if !ignore { - targetAccountIDs = append( - targetAccountIDs, - f.TargetAccountID, - ) - } + // Get account IDs that should be in this home timeline. + accountIDs, err := t.getHomeAccountIDs(ctx, accountID) + if err != nil { + return nil, gtserror.Newf("error getting home account ids: %w", err) } - // Add accountID itself as a pseudo follow so that - // accountID can see its own posts in the timeline. - targetAccountIDs = append(targetAccountIDs, accountID) - // Select only statuses authored by // accounts with IDs in the slice. q = q.Where( "? IN (?)", bun.Ident("account_id"), - bun.In(targetAccountIDs), + bun.In(accountIDs), ) // Only include statuses that aren't pending approval. @@ -309,6 +252,69 @@ func (t *timelineDB) GetTagTimeline(ctx context.Context, tagID string, page *pag ) } +func (t *timelineDB) getHomeAccountIDs(ctx context.Context, accountID string) ([]string, error) { + return t.state.Caches.DB.HomeAccountIDs.Load(accountID, func() ([]string, error) { + // As this is the home timeline, it should be + // populated by statuses from accounts followed + // by accountID, and posts from accountID itself. + // So, begin by seeing who accountID follows. + follows, err := t.state.DB.GetAccountFollows( + gtscontext.SetBarebones(ctx), + accountID, + nil, // select all + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, gtserror.Newf("db error getting follows for account %s: %w", accountID, err) + } + + // To take account of exclusive lists, get all of this account's + // lists, so we can filter out follows that are in exclusive lists. + lists, err := t.state.DB.GetListsByAccountID(ctx, accountID) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, gtserror.Newf("db error getting lists for account %s: %w", accountID, err) + } + + // Index all follow IDs that fall in exclusive lists. + ignoreFollowIDs := make(map[string]struct{}) + for _, list := range lists { + if !*list.Exclusive { + // Not exclusive, + // we don't care. + continue + } + + // Fetch all follow IDs of the entries ccontained in this list. + listFollowIDs, err := t.state.DB.GetFollowIDsInList(ctx, list.ID, nil) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, gtserror.Newf("db error getting list entry follow ids: %w", err) + } + + // Exclusive list, index all its follow IDs. + for _, followID := range listFollowIDs { + ignoreFollowIDs[followID] = struct{}{} + } + } + + // Extract just the accountID from each follow, + // ignoring follows that are in exclusive lists. + targetAccountIDs := make([]string, 0, len(follows)+1) + for _, f := range follows { + _, ignore := ignoreFollowIDs[f.ID] + if !ignore { + targetAccountIDs = append( + targetAccountIDs, + f.TargetAccountID, + ) + } + } + + // Add accountID itself as a pseudo follow so that + // accountID can see its own posts in the timeline. + targetAccountIDs = append(targetAccountIDs, accountID) + return targetAccountIDs, nil + }) +} + func loadStatusTimelinePage( ctx context.Context, db *bun.DB, diff --git a/test/envparsing.sh b/test/envparsing.sh index 36fd847c5..206da0d07 100755 --- a/test/envparsing.sh +++ b/test/envparsing.sh @@ -49,6 +49,7 @@ EXPECT=$(cat << "EOF" "cache-follow-request-ids-mem-ratio": 2, "cache-follow-request-mem-ratio": 2, "cache-following-tag-ids-mem-ratio": 2, + "cache-home-account-ids-mem-ratio": 2, "cache-in-reply-to-ids-mem-ratio": 3, "cache-instance-mem-ratio": 1, "cache-interaction-request-mem-ratio": 1, -- cgit v1.2.3