diff options
Diffstat (limited to 'internal/filter')
| -rw-r--r-- | internal/filter/mutes/filter.go | 2 | ||||
| -rw-r--r-- | internal/filter/status/api.go | 105 | ||||
| -rw-r--r-- | internal/filter/status/filter.go | 33 | ||||
| -rw-r--r-- | internal/filter/status/status.go | 313 | ||||
| -rw-r--r-- | internal/filter/status/text.go | 80 | ||||
| -rw-r--r-- | internal/filter/status/text_test.go | 84 |
6 files changed, 612 insertions, 5 deletions
diff --git a/internal/filter/mutes/filter.go b/internal/filter/mutes/filter.go index 20adc3daf..fc5dd3362 100644 --- a/internal/filter/mutes/filter.go +++ b/internal/filter/mutes/filter.go @@ -41,5 +41,5 @@ const noauth = "noauth" // given statuses or accounts are muted by a requester (user). type Filter struct{ state *state.State } -// NewFilter returns a new Filter interface that will use the provided database. +// NewFilter returns a new Filter interface that will use the provided state. func NewFilter(state *state.State) *Filter { return &Filter{state: state} } diff --git a/internal/filter/status/api.go b/internal/filter/status/api.go new file mode 100644 index 000000000..1d6684b59 --- /dev/null +++ b/internal/filter/status/api.go @@ -0,0 +1,105 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package status + +import ( + "time" + + apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model" + "code.superseriousbusiness.org/gotosocial/internal/gtserror" + "code.superseriousbusiness.org/gotosocial/internal/gtsmodel" + "code.superseriousbusiness.org/gotosocial/internal/util" +) + +// NOTE: the below functions have all been copied +// from typeutils to prevent an import cycle. when +// we move the filtering logic out of the converter +// then we can safely remove these and call necessary +// function without any worry of import cycles. + +func toAPIFilterV2(filter *gtsmodel.Filter) apimodel.FilterV2 { + apiFilterKeywords := make([]apimodel.FilterKeyword, len(filter.Keywords)) + if len(apiFilterKeywords) != len(filter.Keywords) { + // bound check eliminiation compiler-hint + panic(gtserror.New("BCE")) + } + for i, filterKeyword := range filter.Keywords { + apiFilterKeywords[i] = apimodel.FilterKeyword{ + ID: filterKeyword.ID, + Keyword: filterKeyword.Keyword, + WholeWord: util.PtrOrValue(filterKeyword.WholeWord, false), + } + } + apiFilterStatuses := make([]apimodel.FilterStatus, len(filter.Statuses)) + if len(apiFilterStatuses) != len(filter.Statuses) { + // bound check eliminiation compiler-hint + panic(gtserror.New("BCE")) + } + for i, filterStatus := range filter.Statuses { + apiFilterStatuses[i] = apimodel.FilterStatus{ + ID: filterStatus.ID, + StatusID: filterStatus.StatusID, + } + } + return apimodel.FilterV2{ + ID: filter.ID, + Title: filter.Title, + Context: toAPIFilterContexts(filter), + ExpiresAt: toAPIFilterExpiresAt(filter.ExpiresAt), + FilterAction: toAPIFilterAction(filter.Action), + Keywords: apiFilterKeywords, + Statuses: apiFilterStatuses, + } +} + +func toAPIFilterExpiresAt(expiresAt time.Time) *string { + if expiresAt.IsZero() { + return nil + } + return util.Ptr(util.FormatISO8601(expiresAt)) +} + +func toAPIFilterContexts(filter *gtsmodel.Filter) []apimodel.FilterContext { + apiContexts := make([]apimodel.FilterContext, 0, apimodel.FilterContextNumValues) + if filter.Contexts.Home() { + apiContexts = append(apiContexts, apimodel.FilterContextHome) + } + if filter.Contexts.Notifications() { + apiContexts = append(apiContexts, apimodel.FilterContextNotifications) + } + if filter.Contexts.Public() { + apiContexts = append(apiContexts, apimodel.FilterContextPublic) + } + if filter.Contexts.Thread() { + apiContexts = append(apiContexts, apimodel.FilterContextThread) + } + if filter.Contexts.Account() { + apiContexts = append(apiContexts, apimodel.FilterContextAccount) + } + return apiContexts +} + +func toAPIFilterAction(m gtsmodel.FilterAction) apimodel.FilterAction { + switch m { + case gtsmodel.FilterActionWarn: + return apimodel.FilterActionWarn + case gtsmodel.FilterActionHide: + return apimodel.FilterActionHide + } + return apimodel.FilterActionNone +} diff --git a/internal/filter/status/filter.go b/internal/filter/status/filter.go new file mode 100644 index 000000000..d9ec12934 --- /dev/null +++ b/internal/filter/status/filter.go @@ -0,0 +1,33 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package status + +import ( + "code.superseriousbusiness.org/gotosocial/internal/state" +) + +// noauth is a placeholder ID used in cache lookups +// when there is no authorized account ID to use. +const noauth = "noauth" + +// Filter packages up logic for checking whether +// given status is muted by a given requester (user). +type Filter struct{ state *state.State } + +// New returns a new Filter interface that will use the provided state. +func NewFilter(state *state.State) *Filter { return &Filter{state} } diff --git a/internal/filter/status/status.go b/internal/filter/status/status.go index 1a611cdd1..5f997129d 100644 --- a/internal/filter/status/status.go +++ b/internal/filter/status/status.go @@ -15,12 +15,317 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. -// Package status represents status filters managed by the user through the API. package status import ( - "errors" + "context" + "regexp" + "slices" + "time" + + apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model" + "code.superseriousbusiness.org/gotosocial/internal/cache" + "code.superseriousbusiness.org/gotosocial/internal/gtserror" + "code.superseriousbusiness.org/gotosocial/internal/gtsmodel" ) -// ErrHideStatus indicates that a status has been filtered and should not be returned at all. -var ErrHideStatus = errors.New("hide status") +// StatusFilterResultsInContext returns status filtering results, limited +// to the given filtering context, about the given status for requester. +// The hide flag is immediately returned if any filters match with the +// HIDE action set, else API model filter results for the WARN action. +func (f *Filter) StatusFilterResultsInContext( + ctx context.Context, + requester *gtsmodel.Account, + status *gtsmodel.Status, + context gtsmodel.FilterContext, +) ( + results []apimodel.FilterResult, + hidden bool, + err error, +) { + if context == gtsmodel.FilterContextNone { + // fast-check any context. + return nil, false, nil + } + + // Get cached filter results for status to requester in all contexts. + allResults, now, err := f.StatusFilterResults(ctx, requester, status) + if err != nil { + return nil, false, err + } + + // Get results applicable to current context. + var forContext []cache.StatusFilterResult + switch context { + case gtsmodel.FilterContextHome: + forContext = allResults.Results[cache.KeyContextHome] + case gtsmodel.FilterContextPublic: + forContext = allResults.Results[cache.KeyContextPublic] + case gtsmodel.FilterContextNotifications: + forContext = allResults.Results[cache.KeyContextNotifs] + case gtsmodel.FilterContextThread: + forContext = allResults.Results[cache.KeyContextThread] + case gtsmodel.FilterContextAccount: + forContext = allResults.Results[cache.KeyContextAccount] + } + + // Iterate results in context, gathering prepared API models. + results = make([]apimodel.FilterResult, 0, len(forContext)) + for _, result := range forContext { + + // Check if result expired. + if result.Expired(now) { + continue + } + + // If the result indicates + // status should just be + // hidden then return here. + if result.Result == nil { + return nil, true, nil + } + + // Append pre-prepared API model to slice. + results = append(results, *result.Result) + } + + return +} + +// StatusFilterResults returns status filtering results (in all contexts) about the given status for the given requesting account. +func (f *Filter) StatusFilterResults(ctx context.Context, requester *gtsmodel.Account, status *gtsmodel.Status) (*cache.CachedStatusFilterResults, time.Time, error) { + + // For requester ID use a + // fallback 'noauth' string + // by default for lookups. + requesterID := noauth + if requester != nil { + requesterID = requester.ID + } + + // Get current time. + now := time.Now() + + // Load status filtering results for this requesting account about status from cache, using load callback function if necessary. + results, err := f.state.Caches.StatusFilter.LoadOne("RequesterID,StatusID", func() (*cache.CachedStatusFilterResults, error) { + + // Load status filter results for given status. + results, err := f.getStatusFilterResults(ctx, + requester, + status, + now, + ) + if err != nil { + if err == cache.SentinelError { + // Filter-out our temporary + // race-condition error. + return &cache.CachedStatusFilterResults{}, nil + } + + return nil, err + } + + // Convert to cacheable results type. + return &cache.CachedStatusFilterResults{ + StatusID: status.ID, + RequesterID: requesterID, + Results: results, + }, nil + }, requesterID, status.ID) + if err != nil { + return nil, now, err + } + + return results, now, err +} + +// getStatusFilterResults loads status filtering results for +// the given status, given the current time (checking expiries). +// this will load results for all possible filtering contexts. +func (f *Filter) getStatusFilterResults( + ctx context.Context, + requester *gtsmodel.Account, + status *gtsmodel.Status, + now time.Time, +) ( + [5][]cache.StatusFilterResult, + error, +) { + var results [5][]cache.StatusFilterResult + + if requester == nil { + // Without auth, there will be no possible + // filters to exists, return as 'unfiltered'. + return results, nil + } + + // Get the string fields status is + // filterable on for keyword matching. + fields := getFilterableFields(status) + + // Get all status filters owned by the requesting account. + filters, err := f.state.DB.GetFiltersByAccountID(ctx, requester.ID) + if err != nil { + return results, gtserror.Newf("error getting account filters: %w", err) + } + + // For proper status filtering we need all fields populated. + if err := f.state.DB.PopulateStatus(ctx, status); err != nil { + return results, gtserror.Newf("error populating status: %w", err) + } + + // Generate result for each filter. + for _, filter := range filters { + + // Skip already expired. + if filter.Expired(now) { + continue + } + + // Later stored API result, if any. + // (for the HIDE action, it is unset). + var apiResult *apimodel.FilterResult + + switch filter.Action { + case gtsmodel.FilterActionWarn: + // For filter action WARN get all possible filter matches against status. + keywordMatches, statusMatches := getFilterMatches(filter, status.ID, fields) + if len(keywordMatches) == 0 && len(statusMatches) == 0 { + continue + } + + // Wrap matches in frontend API model. + apiResult = &apimodel.FilterResult{ + Filter: toAPIFilterV2(filter), + + KeywordMatches: keywordMatches, + StatusMatches: statusMatches, + } + + // For filter action HIDE quickly + // look for first possible match + // against this status, or reloop. + case gtsmodel.FilterActionHide: + if !doesFilterMatch(filter, status.ID, fields) { + continue + } + } + + // Wrap the filter result in our cache model. + // This model simply existing implies this + // status has been filtered, defaulting to + // action HIDE, or WARN on a non-nil result. + result := cache.StatusFilterResult{ + Expiry: filter.ExpiresAt, + Result: apiResult, + } + + // Append generated result if + // applies in 'home' context. + if filter.Contexts.Home() { + const key = cache.KeyContextHome + results[key] = append(results[key], result) + } + + // Append generated result if + // applies in 'public' context. + if filter.Contexts.Public() { + const key = cache.KeyContextPublic + results[key] = append(results[key], result) + } + + // Append generated result if + // applies in 'notifs' context. + if filter.Contexts.Notifications() { + const key = cache.KeyContextNotifs + results[key] = append(results[key], result) + } + + // Append generated result if + // applies in 'thread' context. + if filter.Contexts.Thread() { + const key = cache.KeyContextThread + results[key] = append(results[key], result) + } + + // Append generated result if + // applies in 'account' context. + if filter.Contexts.Account() { + const key = cache.KeyContextAccount + results[key] = append(results[key], result) + } + } + + // Iterate all filter results. + for _, key := range [5]int{ + cache.KeyContextHome, + cache.KeyContextPublic, + cache.KeyContextNotifs, + cache.KeyContextThread, + cache.KeyContextAccount, + } { + // Sort the slice of filter results by their expiry, soonest coming first. + slices.SortFunc(results[key], func(a, b cache.StatusFilterResult) int { + const k = +1 + switch { + case a.Expiry.IsZero(): + if b.Expiry.IsZero() { + return 0 + } + return +k + case b.Expiry.IsZero(): + return -k + case a.Expiry.Before(b.Expiry): + return -k + case b.Expiry.Before(a.Expiry): + return +k + default: + return 0 + } + }) + } + + return results, nil +} + +// getFilterMatches returns *all* the keyword and status matches of status ID and fields on given filter. +func getFilterMatches(filter *gtsmodel.Filter, statusID string, fields []string) ([]string, []string) { + keywordMatches := make([]string, 0, len(filter.Keywords)) + for _, keyword := range filter.Keywords { + if doesKeywordMatch(keyword.Regexp, fields) { + keywordMatches = append(keywordMatches, keyword.Keyword) + } + } + statusMatches := make([]string, 0, 1) + for _, status := range filter.Statuses { + if status.StatusID == statusID { + statusMatches = append(statusMatches, statusID) + } + } + return keywordMatches, statusMatches +} + +// doesFilterMatch returns if any of fields or status ID match on the given filter. +func doesFilterMatch(filter *gtsmodel.Filter, statusID string, fields []string) bool { + for _, status := range filter.Statuses { + if status.StatusID == statusID { + return true + } + } + for _, keyword := range filter.Keywords { + if doesKeywordMatch(keyword.Regexp, fields) { + return true + } + } + return false +} + +// doesKeywordMatch returns if any of fields match given keyword regex. +func doesKeywordMatch(rgx *regexp.Regexp, fields []string) bool { + for _, field := range fields { + if rgx.MatchString(field) { + return true + } + } + return false +} diff --git a/internal/filter/status/text.go b/internal/filter/status/text.go new file mode 100644 index 000000000..347e1193c --- /dev/null +++ b/internal/filter/status/text.go @@ -0,0 +1,80 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package status + +import ( + "code.superseriousbusiness.org/gotosocial/internal/gtsmodel" + "code.superseriousbusiness.org/gotosocial/internal/text" +) + +// getFilterableFields returns text fields from +// a status that we might want to filter on: +// +// - content warning +// - content (converted to plaintext from HTML) +// - media descriptions +// - poll options +// +// Each field should be filtered separately. This avoids +// scenarios where false-positive multiple-word matches +// can be made by matching the last word of one field +// combined with the first word of the next field together. +func getFilterableFields(status *gtsmodel.Status) []string { + + // Estimate expected no of status fields. + fieldCount := 2 + len(status.Attachments) + if status.Poll != nil { + fieldCount += len(status.Poll.Options) + } + fields := make([]string, 0, fieldCount) + + // Append content warning / title. + if status.ContentWarning != "" { + fields = append(fields, status.ContentWarning) + } + + // Status content. Though we have raw text + // available for statuses created on our + // instance, use the plaintext version to + // remove markdown-formatting characters + // and ensure more consistent filtering. + if status.Content != "" { + text := text.ParseHTMLToPlain(status.Content) + if text != "" { + fields = append(fields, text) + } + } + + // Media descriptions, only where they are set. + for _, attachment := range status.Attachments { + if attachment.Description != "" { + fields = append(fields, attachment.Description) + } + } + + // Non-empty poll options. + if status.Poll != nil { + for _, opt := range status.Poll.Options { + if opt != "" { + fields = append(fields, opt) + } + } + } + + return fields +} diff --git a/internal/filter/status/text_test.go b/internal/filter/status/text_test.go new file mode 100644 index 000000000..f9283f826 --- /dev/null +++ b/internal/filter/status/text_test.go @@ -0,0 +1,84 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package status + +import ( + "testing" + + "code.superseriousbusiness.org/gotosocial/internal/gtsmodel" + "github.com/stretchr/testify/assert" +) + +func TestFilterableText(t *testing.T) { + type testcase struct { + status *gtsmodel.Status + expectedFields []string + } + + for _, testcase := range []testcase{ + { + status: >smodel.Status{ + ContentWarning: "This is a test status", + Content: `<p>Import / export of account data via CSV files will be coming in 0.17.0 :) No more having to run scripts + CLI tools to import a list of accounts you follow, after doing a migration to a <a href="https://gts.superseriousbusiness.org/tags/gotosocial" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>GoToSocial</span></a> instance.</p>`, + }, + expectedFields: []string{ + "This is a test status", + "Import / export of account data via CSV files will be coming in 0.17.0 :) No more having to run scripts + CLI tools to import a list of accounts you follow, after doing a migration to a #GoToSocial <https://gts.superseriousbusiness.org/tags/gotosocial> instance.", + }, + }, + { + status: >smodel.Status{ + Content: `<p><span class="h-card"><a href="https://example.org/@zlatko" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>zlatko</span></a></span> currently we used modernc/sqlite3 for our sqlite driver, but we've been experimenting with wasm sqlite, and will likely move to that permanently in future; in the meantime, both options are available (the latter with a build tag)</p><p><a href="https://codeberg.org/superseriousbusiness/gotosocial/pulls/2863" rel="nofollow noreferrer noopener" target="_blank">https://codeberg.org/superseriousbusiness/gotosocial/pulls/2863</a></p>`, + }, + expectedFields: []string{ + "@zlatko <https://example.org/@zlatko> currently we used modernc/sqlite3 for our sqlite driver, but we've been experimenting with wasm sqlite, and will likely move to that permanently in future; in the meantime, both options are available (the latter with a build tag)\n\nhttps://codeberg.org/superseriousbusiness/gotosocial/pulls/2863 <https://codeberg.org/superseriousbusiness/gotosocial/pulls/2863>", + }, + }, + { + status: >smodel.Status{ + ContentWarning: "Nerd stuff", + Content: `<p>Latest graphs for <a href="https://gts.superseriousbusiness.org/tags/gotosocial" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>GoToSocial</span></a> on <a href="https://github.com/ncruces/go-sqlite3" rel="nofollow noreferrer noopener" target="_blank">Wasm sqlite3</a> with <a href="https://codeberg.org/gruf/go-ffmpreg" rel="nofollow noreferrer noopener" target="_blank">embedded Wasm ffmpeg</a>, both running on <a href="https://wazero.io/" rel="nofollow noreferrer noopener" target="_blank">Wazero</a>, and configured with a <a href="https://codeberg.org/superseriousbusiness/gotosocial/src/commit/20fe430ef9ff3012a7a4dc2d01b68020c20e13bb/example/config.yaml#L259-L266" rel="nofollow noreferrer noopener" target="_blank">50MiB db cache target</a>. This is the version we'll be releasing soonish, now we're happy with how we've tamed everything.</p>`, + Attachments: []*gtsmodel.MediaAttachment{ + { + Description: `Graph showing GtS using between 150-300 MiB of memory, steadily, over a few days.`, + }, + { + Description: `Another media attachment`, + }, + }, + Poll: >smodel.Poll{ + Options: []string{ + "Poll option 1", + "Poll option 2", + }, + }, + }, + expectedFields: []string{ + "Nerd stuff", + "Latest graphs for #GoToSocial <https://gts.superseriousbusiness.org/tags/gotosocial> on Wasm sqlite3 <https://github.com/ncruces/go-sqlite3> with embedded Wasm ffmpeg <https://codeberg.org/gruf/go-ffmpreg>, both running on Wazero <https://wazero.io/>, and configured with a 50MiB db cache target <https://codeberg.org/superseriousbusiness/gotosocial/src/commit/20fe430ef9ff3012a7a4dc2d01b68020c20e13bb/example/config.yaml#L259-L266>. This is the version we'll be releasing soonish, now we're happy with how we've tamed everything.", + "Graph showing GtS using between 150-300 MiB of memory, steadily, over a few days.", + "Another media attachment", + "Poll option 1", + "Poll option 2", + }, + }, + } { + fields := getFilterableFields(testcase.status) + assert.Equal(t, testcase.expectedFields, fields) + } +} |
