summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com>2024-05-21 13:20:19 +0000
committerLibravatar GitHub <noreply@github.com>2024-05-21 14:20:19 +0100
commitb092da6d281a437c3f847e655c4ad182decb0978 (patch)
treecd5dcc7bebb055cd1b1714fa7b41362a44fdb788
parent[chore] dependabot updates (#2922) (diff)
downloadgotosocial-b092da6d281a437c3f847e655c4ad182decb0978.tar.xz
[performance] cache v2 filter keyword regular expressions (#2903)
* add caching of filterkeyword regular expressions * formatting * fix WholeWord nil check
-rw-r--r--internal/cache/db.go5
-rw-r--r--internal/db/bundb/filterkeyword.go67
-rw-r--r--internal/gtsmodel/filter.go35
-rw-r--r--internal/typeutils/internaltofrontend.go12
-rw-r--r--internal/typeutils/internaltofrontend_test.go2
5 files changed, 85 insertions, 36 deletions
diff --git a/internal/cache/db.go b/internal/cache/db.go
index 4ce541770..16bd10eaa 100644
--- a/internal/cache/db.go
+++ b/internal/cache/db.go
@@ -531,6 +531,11 @@ func (c *Caches) initFilterKeyword() {
// See internal/db/bundb/filter.go.
filterKeyword2.Filter = nil
+ // We specifically DO NOT unset
+ // the regexp field here, as any
+ // regexp.Regexp instance is safe
+ // for concurrent access.
+
return filterKeyword2
}
diff --git a/internal/db/bundb/filterkeyword.go b/internal/db/bundb/filterkeyword.go
index 5fd824a0b..87a8e2a2a 100644
--- a/internal/db/bundb/filterkeyword.go
+++ b/internal/db/bundb/filterkeyword.go
@@ -25,6 +25,7 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/uptrace/bun"
)
@@ -34,12 +35,22 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod
"ID",
func() (*gtsmodel.FilterKeyword, error) {
var filterKeyword gtsmodel.FilterKeyword
- err := f.db.
+
+ // Scan from DB.
+ if err := f.db.
NewSelect().
Model(&filterKeyword).
Where("? = ?", bun.Ident("id"), id).
- Scan(ctx)
- return &filterKeyword, err
+ Scan(ctx); err != nil {
+ return nil, err
+ }
+
+ // Pre-compile filter keyword regular expression.
+ if err := filterKeyword.Compile(); err != nil {
+ return nil, gtserror.Newf("error compiling filter keyword regex: %w", err)
+ }
+
+ return &filterKeyword, nil
},
id,
)
@@ -57,20 +68,20 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod
return filterKeyword, nil
}
-func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error {
+func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) (err error) {
if filterKeyword.Filter == nil {
// Filter is not set, fetch from the cache or database.
- filter, err := f.state.DB.GetFilterByID(
- // Don't populate the filter with all of its keywords and statuses or we'll just end up back here.
+ filterKeyword.Filter, err = f.state.DB.GetFilterByID(
+
+ // Don't populate the filter with all of its keywords
+ // and statuses or we'll just end up back here.
gtscontext.SetBarebones(ctx),
filterKeyword.FilterID,
)
if err != nil {
return err
}
- filterKeyword.Filter = filter
}
-
return nil
}
@@ -84,6 +95,7 @@ func (f *filterDB) GetFilterKeywordsForAccountID(ctx context.Context, accountID
func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) {
var filterKeywordIDs []string
+
if err := f.db.
NewSelect().
Model((*gtsmodel.FilterKeyword)(nil)).
@@ -92,6 +104,7 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
Scan(ctx, &filterKeywordIDs); err != nil {
return nil, err
}
+
if len(filterKeywordIDs) == 0 {
return nil, nil
}
@@ -101,6 +114,8 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
filterKeywordIDs,
func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) {
uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs))
+
+ // Scan from DB.
if err := f.db.
NewSelect().
Model(&uncachedFilterKeywords).
@@ -108,6 +123,16 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
Scan(ctx); err != nil {
return nil, err
}
+
+ // Compile all the keyword regular expressions.
+ uncachedFilterKeywords = slices.DeleteFunc(uncachedFilterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {
+ if err := filterKeyword.Compile(); err != nil {
+ log.Errorf(ctx, "error compiling filter keyword regex: %v", err)
+ return true
+ }
+ return false
+ })
+
return uncachedFilterKeywords, nil
},
)
@@ -125,23 +150,26 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st
}
// Populate the filter keywords. Remove any that we can't populate from the return slice.
- errs := gtserror.NewMultiError(len(filterKeywords))
filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {
if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil {
- errs.Appendf(
- "error populating filter keyword %s: %w",
- filterKeyword.ID,
- err,
- )
+ log.Errorf(ctx, "error populating filter keyword: %v", err)
return true
}
return false
})
- return filterKeywords, errs.Combine()
+ return filterKeywords, nil
}
func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error {
+ if filterKeyword.Regexp == nil {
+ // Ensure regexp is compiled
+ // before attempted caching.
+ err := filterKeyword.Compile()
+ if err != nil {
+ return gtserror.Newf("error compiling filter keyword regex: %w", err)
+ }
+ }
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
_, err := f.db.
NewInsert().
@@ -156,7 +184,14 @@ func (f *filterDB) UpdateFilterKeyword(ctx context.Context, filterKeyword *gtsmo
if len(columns) > 0 {
columns = append(columns, "updated_at")
}
-
+ if filterKeyword.Regexp == nil {
+ // Ensure regexp is compiled
+ // before attempted caching.
+ err := filterKeyword.Compile()
+ if err != nil {
+ return gtserror.Newf("error compiling filter keyword regex: %w", err)
+ }
+ }
return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {
_, err := f.db.
NewUpdate().
diff --git a/internal/gtsmodel/filter.go b/internal/gtsmodel/filter.go
index db0a15dfd..c3feec00f 100644
--- a/internal/gtsmodel/filter.go
+++ b/internal/gtsmodel/filter.go
@@ -17,7 +17,10 @@
package gtsmodel
-import "time"
+import (
+ "regexp"
+ "time"
+)
// Filter stores a filter created by a local account.
type Filter struct {
@@ -39,14 +42,28 @@ type Filter struct {
// FilterKeyword stores a single keyword to filter statuses against.
type FilterKeyword struct {
- ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
- CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
- UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
- AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword.
- FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to.
- Filter *Filter `bun:"-"` // Filter corresponding to FilterID
- Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against.
- WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries?
+ ID string `bun:"type:CHAR(26),pk,nullzero,notnull,unique"` // id of this item in the database
+ CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item created
+ UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"` // when was item last updated
+ AccountID string `bun:"type:CHAR(26),notnull,nullzero"` // ID of the local account that created the filter keyword.
+ FilterID string `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to.
+ Filter *Filter `bun:"-"` // Filter corresponding to FilterID
+ Keyword string `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"` // The keyword or phrase to filter against.
+ WholeWord *bool `bun:",nullzero,notnull,default:false"` // Should the filter consider word boundaries?
+ Regexp *regexp.Regexp `bun:"-"` // pre-prepared regular expression
+}
+
+// Compile will compile this FilterKeyword as a prepared regular expression.
+func (k *FilterKeyword) Compile() (err error) {
+ var wordBreak string
+ if k.WholeWord != nil && *k.WholeWord {
+ wordBreak = `\b`
+ }
+
+ // Compile keyword filter regexp.
+ quoted := regexp.QuoteMeta(k.Keyword)
+ k.Regexp, err = regexp.Compile(`(?i)` + wordBreak + quoted + wordBreak)
+ return // caller is expected to wrap this error
}
// FilterStatus stores a single status to filter.
diff --git a/internal/typeutils/internaltofrontend.go b/internal/typeutils/internaltofrontend.go
index 7a5572267..d7f1fac6c 100644
--- a/internal/typeutils/internaltofrontend.go
+++ b/internal/typeutils/internaltofrontend.go
@@ -22,7 +22,6 @@ import (
"errors"
"fmt"
"math"
- "regexp"
"strconv"
"strings"
"time"
@@ -746,18 +745,9 @@ func (c *Converter) statusToAPIFilterResults(
keywordMatches := make([]string, 0, len(filter.Keywords))
fields := filterableTextFields(s)
for _, filterKeyword := range filter.Keywords {
- wholeWord := util.PtrValueOr(filterKeyword.WholeWord, false)
- wordBreak := ``
- if wholeWord {
- wordBreak = `\b`
- }
- re, err := regexp.Compile(`(?i)` + wordBreak + regexp.QuoteMeta(filterKeyword.Keyword) + wordBreak)
- if err != nil {
- return nil, err
- }
var isMatch bool
for _, field := range fields {
- if re.MatchString(field) {
+ if filterKeyword.Regexp.MatchString(field) {
isMatch = true
break
}
diff --git a/internal/typeutils/internaltofrontend_test.go b/internal/typeutils/internaltofrontend_test.go
index 2c4f28a9b..676870c7b 100644
--- a/internal/typeutils/internaltofrontend_test.go
+++ b/internal/typeutils/internaltofrontend_test.go
@@ -546,6 +546,7 @@ func (suite *InternalToFrontendTestSuite) TestWarnFilteredStatusToFrontend() {
requestingAccount := suite.testAccounts["local_account_1"]
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
+ suite.NoError(expectedMatchingFilterKeyword.Compile())
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}
@@ -700,6 +701,7 @@ func (suite *InternalToFrontendTestSuite) TestHideFilteredStatusToFrontend() {
expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]
expectedMatchingFilter.Action = gtsmodel.FilterActionHide
expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"]
+ suite.NoError(expectedMatchingFilterKeyword.Compile())
expectedMatchingFilterKeyword.Filter = expectedMatchingFilter
expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}
requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}