diff options
| -rw-r--r-- | internal/cache/db.go | 5 | ||||
| -rw-r--r-- | internal/db/bundb/filterkeyword.go | 67 | ||||
| -rw-r--r-- | internal/gtsmodel/filter.go | 35 | ||||
| -rw-r--r-- | internal/typeutils/internaltofrontend.go | 12 | ||||
| -rw-r--r-- | internal/typeutils/internaltofrontend_test.go | 2 | 
5 files changed, 85 insertions, 36 deletions
diff --git a/internal/cache/db.go b/internal/cache/db.go index 4ce541770..16bd10eaa 100644 --- a/internal/cache/db.go +++ b/internal/cache/db.go @@ -531,6 +531,11 @@ func (c *Caches) initFilterKeyword() {  		// See internal/db/bundb/filter.go.  		filterKeyword2.Filter = nil +		// We specifically DO NOT unset +		// the regexp field here, as any +		// regexp.Regexp instance is safe +		// for concurrent access. +  		return filterKeyword2  	} diff --git a/internal/db/bundb/filterkeyword.go b/internal/db/bundb/filterkeyword.go index 5fd824a0b..87a8e2a2a 100644 --- a/internal/db/bundb/filterkeyword.go +++ b/internal/db/bundb/filterkeyword.go @@ -25,6 +25,7 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/gtscontext"  	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/log"  	"github.com/superseriousbusiness/gotosocial/internal/util"  	"github.com/uptrace/bun"  ) @@ -34,12 +35,22 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod  		"ID",  		func() (*gtsmodel.FilterKeyword, error) {  			var filterKeyword gtsmodel.FilterKeyword -			err := f.db. + +			// Scan from DB. +			if err := f.db.  				NewSelect().  				Model(&filterKeyword).  				Where("? = ?", bun.Ident("id"), id). -				Scan(ctx) -			return &filterKeyword, err +				Scan(ctx); err != nil { +				return nil, err +			} + +			// Pre-compile filter keyword regular expression. +			if err := filterKeyword.Compile(); err != nil { +				return nil, gtserror.Newf("error compiling filter keyword regex: %w", err) +			} + +			return &filterKeyword, nil  		},  		id,  	) @@ -57,20 +68,20 @@ func (f *filterDB) GetFilterKeywordByID(ctx context.Context, id string) (*gtsmod  	return filterKeyword, nil  } -func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { +func (f *filterDB) populateFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) (err error) {  	if filterKeyword.Filter == nil {  		// Filter is not set, fetch from the cache or database. -		filter, err := f.state.DB.GetFilterByID( -			// Don't populate the filter with all of its keywords and statuses or we'll just end up back here. +		filterKeyword.Filter, err = f.state.DB.GetFilterByID( + +			// Don't populate the filter with all of its keywords +			// and statuses or we'll just end up back here.  			gtscontext.SetBarebones(ctx),  			filterKeyword.FilterID,  		)  		if err != nil {  			return err  		} -		filterKeyword.Filter = filter  	} -  	return nil  } @@ -84,6 +95,7 @@ func (f *filterDB) GetFilterKeywordsForAccountID(ctx context.Context, accountID  func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id string) ([]*gtsmodel.FilterKeyword, error) {  	var filterKeywordIDs []string +  	if err := f.db.  		NewSelect().  		Model((*gtsmodel.FilterKeyword)(nil)). @@ -92,6 +104,7 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st  		Scan(ctx, &filterKeywordIDs); err != nil {  		return nil, err  	} +  	if len(filterKeywordIDs) == 0 {  		return nil, nil  	} @@ -101,6 +114,8 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st  		filterKeywordIDs,  		func(uncachedFilterKeywordIDs []string) ([]*gtsmodel.FilterKeyword, error) {  			uncachedFilterKeywords := make([]*gtsmodel.FilterKeyword, 0, len(uncachedFilterKeywordIDs)) + +			// Scan from DB.  			if err := f.db.  				NewSelect().  				Model(&uncachedFilterKeywords). @@ -108,6 +123,16 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st  				Scan(ctx); err != nil {  				return nil, err  			} + +			// Compile all the keyword regular expressions. +			uncachedFilterKeywords = slices.DeleteFunc(uncachedFilterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool { +				if err := filterKeyword.Compile(); err != nil { +					log.Errorf(ctx, "error compiling filter keyword regex: %v", err) +					return true +				} +				return false +			}) +  			return uncachedFilterKeywords, nil  		},  	) @@ -125,23 +150,26 @@ func (f *filterDB) getFilterKeywords(ctx context.Context, idColumn string, id st  	}  	// Populate the filter keywords. Remove any that we can't populate from the return slice. -	errs := gtserror.NewMultiError(len(filterKeywords))  	filterKeywords = slices.DeleteFunc(filterKeywords, func(filterKeyword *gtsmodel.FilterKeyword) bool {  		if err := f.populateFilterKeyword(ctx, filterKeyword); err != nil { -			errs.Appendf( -				"error populating filter keyword %s: %w", -				filterKeyword.ID, -				err, -			) +			log.Errorf(ctx, "error populating filter keyword: %v", err)  			return true  		}  		return false  	}) -	return filterKeywords, errs.Combine() +	return filterKeywords, nil  }  func (f *filterDB) PutFilterKeyword(ctx context.Context, filterKeyword *gtsmodel.FilterKeyword) error { +	if filterKeyword.Regexp == nil { +		// Ensure regexp is compiled +		// before attempted caching. +		err := filterKeyword.Compile() +		if err != nil { +			return gtserror.Newf("error compiling filter keyword regex: %w", err) +		} +	}  	return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {  		_, err := f.db.  			NewInsert(). @@ -156,7 +184,14 @@ func (f *filterDB) UpdateFilterKeyword(ctx context.Context, filterKeyword *gtsmo  	if len(columns) > 0 {  		columns = append(columns, "updated_at")  	} - +	if filterKeyword.Regexp == nil { +		// Ensure regexp is compiled +		// before attempted caching. +		err := filterKeyword.Compile() +		if err != nil { +			return gtserror.Newf("error compiling filter keyword regex: %w", err) +		} +	}  	return f.state.Caches.GTS.FilterKeyword.Store(filterKeyword, func() error {  		_, err := f.db.  			NewUpdate(). diff --git a/internal/gtsmodel/filter.go b/internal/gtsmodel/filter.go index db0a15dfd..c3feec00f 100644 --- a/internal/gtsmodel/filter.go +++ b/internal/gtsmodel/filter.go @@ -17,7 +17,10 @@  package gtsmodel -import "time" +import ( +	"regexp" +	"time" +)  // Filter stores a filter created by a local account.  type Filter struct { @@ -39,14 +42,28 @@ type Filter struct {  // FilterKeyword stores a single keyword to filter statuses against.  type FilterKeyword struct { -	ID        string    `bun:"type:CHAR(26),pk,nullzero,notnull,unique"`                                     // id of this item in the database -	CreatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item created -	UpdatedAt time.Time `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item last updated -	AccountID string    `bun:"type:CHAR(26),notnull,nullzero"`                                               // ID of the local account that created the filter keyword. -	FilterID  string    `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to. -	Filter    *Filter   `bun:"-"`                                                                            // Filter corresponding to FilterID -	Keyword   string    `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"`              // The keyword or phrase to filter against. -	WholeWord *bool     `bun:",nullzero,notnull,default:false"`                                              // Should the filter consider word boundaries? +	ID        string         `bun:"type:CHAR(26),pk,nullzero,notnull,unique"`                                     // id of this item in the database +	CreatedAt time.Time      `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item created +	UpdatedAt time.Time      `bun:"type:timestamptz,nullzero,notnull,default:current_timestamp"`                  // when was item last updated +	AccountID string         `bun:"type:CHAR(26),notnull,nullzero"`                                               // ID of the local account that created the filter keyword. +	FilterID  string         `bun:"type:CHAR(26),notnull,nullzero,unique:filter_keywords_filter_id_keyword_uniq"` // ID of the filter that this keyword belongs to. +	Filter    *Filter        `bun:"-"`                                                                            // Filter corresponding to FilterID +	Keyword   string         `bun:",nullzero,notnull,unique:filter_keywords_filter_id_keyword_uniq"`              // The keyword or phrase to filter against. +	WholeWord *bool          `bun:",nullzero,notnull,default:false"`                                              // Should the filter consider word boundaries? +	Regexp    *regexp.Regexp `bun:"-"`                                                                            // pre-prepared regular expression +} + +// Compile will compile this FilterKeyword as a prepared regular expression. +func (k *FilterKeyword) Compile() (err error) { +	var wordBreak string +	if k.WholeWord != nil && *k.WholeWord { +		wordBreak = `\b` +	} + +	// Compile keyword filter regexp. +	quoted := regexp.QuoteMeta(k.Keyword) +	k.Regexp, err = regexp.Compile(`(?i)` + wordBreak + quoted + wordBreak) +	return // caller is expected to wrap this error  }  // FilterStatus stores a single status to filter. diff --git a/internal/typeutils/internaltofrontend.go b/internal/typeutils/internaltofrontend.go index 7a5572267..d7f1fac6c 100644 --- a/internal/typeutils/internaltofrontend.go +++ b/internal/typeutils/internaltofrontend.go @@ -22,7 +22,6 @@ import (  	"errors"  	"fmt"  	"math" -	"regexp"  	"strconv"  	"strings"  	"time" @@ -746,18 +745,9 @@ func (c *Converter) statusToAPIFilterResults(  		keywordMatches := make([]string, 0, len(filter.Keywords))  		fields := filterableTextFields(s)  		for _, filterKeyword := range filter.Keywords { -			wholeWord := util.PtrValueOr(filterKeyword.WholeWord, false) -			wordBreak := `` -			if wholeWord { -				wordBreak = `\b` -			} -			re, err := regexp.Compile(`(?i)` + wordBreak + regexp.QuoteMeta(filterKeyword.Keyword) + wordBreak) -			if err != nil { -				return nil, err -			}  			var isMatch bool  			for _, field := range fields { -				if re.MatchString(field) { +				if filterKeyword.Regexp.MatchString(field) {  					isMatch = true  					break  				} diff --git a/internal/typeutils/internaltofrontend_test.go b/internal/typeutils/internaltofrontend_test.go index 2c4f28a9b..676870c7b 100644 --- a/internal/typeutils/internaltofrontend_test.go +++ b/internal/typeutils/internaltofrontend_test.go @@ -546,6 +546,7 @@ func (suite *InternalToFrontendTestSuite) TestWarnFilteredStatusToFrontend() {  	requestingAccount := suite.testAccounts["local_account_1"]  	expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]  	expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] +	suite.NoError(expectedMatchingFilterKeyword.Compile())  	expectedMatchingFilterKeyword.Filter = expectedMatchingFilter  	expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}  	requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter} @@ -700,6 +701,7 @@ func (suite *InternalToFrontendTestSuite) TestHideFilteredStatusToFrontend() {  	expectedMatchingFilter := suite.testFilters["local_account_1_filter_1"]  	expectedMatchingFilter.Action = gtsmodel.FilterActionHide  	expectedMatchingFilterKeyword := suite.testFilterKeywords["local_account_1_filter_1_keyword_1"] +	suite.NoError(expectedMatchingFilterKeyword.Compile())  	expectedMatchingFilterKeyword.Filter = expectedMatchingFilter  	expectedMatchingFilter.Keywords = []*gtsmodel.FilterKeyword{expectedMatchingFilterKeyword}  	requestingAccountFilters := []*gtsmodel.Filter{expectedMatchingFilter}  | 
