summaryrefslogtreecommitdiff
path: root/internal/typeutils
diff options
context:
space:
mode:
Diffstat (limited to 'internal/typeutils')
-rw-r--r--internal/typeutils/internaltofrontend.go81
-rw-r--r--internal/typeutils/internaltofrontend_test.go24
-rw-r--r--internal/typeutils/util.go62
-rw-r--r--internal/typeutils/util_test.go60
4 files changed, 170 insertions, 57 deletions
diff --git a/internal/typeutils/internaltofrontend.go b/internal/typeutils/internaltofrontend.go
index 55af2c1f1..fe49766fa 100644
--- a/internal/typeutils/internaltofrontend.go
+++ b/internal/typeutils/internaltofrontend.go
@@ -21,6 +21,8 @@ import (
"context"
"errors"
"fmt"
+ "slices"
+ "strconv"
"strings"
"time"
@@ -35,7 +37,6 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/language"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/media"
- "github.com/superseriousbusiness/gotosocial/internal/text"
"github.com/superseriousbusiness/gotosocial/internal/uris"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
@@ -939,32 +940,48 @@ func (c *Converter) statusToAPIFilterResults(
return nil, nil
}
- // Extract text fields from the status that we will match filters against.
- fields := filterableTextFields(s)
+ // Key this status based on ID + last updated time,
+ // to ensure we always filter on latest version.
+ statusKey := s.ID + strconv.FormatInt(s.UpdatedAt.Unix(), 10)
+
+ // Check if we have filterable fields cached for this status.
+ cache := c.state.Caches.StatusesFilterableFields
+ fields, stored := cache.Get(statusKey)
+ if !stored {
+ // We don't have filterable fields
+ // cached, calculate + cache now.
+ fields = filterableFields(s)
+ cache.Set(statusKey, fields)
+ }
// Record all matching warn filters and the reasons they matched.
filterResults := make([]apimodel.FilterResult, 0, len(filters))
for _, filter := range filters {
if !filterAppliesInContext(filter, filterContext) {
- // Filter doesn't apply to this context.
+ // Filter doesn't apply
+ // to this context.
continue
}
+
if filter.Expired(now) {
+ // Filter doesn't
+ // apply anymore.
continue
}
- // List all matching keywords.
+ // Assemble matching keywords (if any) from this filter.
keywordMatches := make([]string, 0, len(filter.Keywords))
- for _, filterKeyword := range filter.Keywords {
- var isMatch bool
- for _, field := range fields {
- if filterKeyword.Regexp.MatchString(field) {
- isMatch = true
- break
- }
- }
- if isMatch {
- keywordMatches = append(keywordMatches, filterKeyword.Keyword)
+ for _, keyword := range filter.Keywords {
+ // Check if at least one filterable field
+ // in the status matches on this filter.
+ if slices.ContainsFunc(
+ fields,
+ func(field string) bool {
+ return keyword.Regexp.MatchString(field)
+ },
+ ) {
+ // At least one field matched on this filter.
+ keywordMatches = append(keywordMatches, keyword.Keyword)
}
}
@@ -1001,40 +1018,6 @@ func (c *Converter) statusToAPIFilterResults(
return filterResults, nil
}
-// filterableTextFields returns all text from a status that we might want to filter on:
-// - content
-// - content warning
-// - media descriptions
-// - poll options
-func filterableTextFields(s *gtsmodel.Status) []string {
- fieldCount := 2 + len(s.Attachments)
- if s.Poll != nil {
- fieldCount += len(s.Poll.Options)
- }
- fields := make([]string, 0, fieldCount)
-
- if s.Content != "" {
- fields = append(fields, text.SanitizeToPlaintext(s.Content))
- }
- if s.ContentWarning != "" {
- fields = append(fields, s.ContentWarning)
- }
- for _, attachment := range s.Attachments {
- if attachment.Description != "" {
- fields = append(fields, attachment.Description)
- }
- }
- if s.Poll != nil {
- for _, option := range s.Poll.Options {
- if option != "" {
- fields = append(fields, option)
- }
- }
- }
-
- return fields
-}
-
// filterAppliesInContext returns whether a given filter applies in a given context.
func filterAppliesInContext(filter *gtsmodel.Filter, filterContext statusfilter.FilterContext) bool {
switch filterContext {
diff --git a/internal/typeutils/internaltofrontend_test.go b/internal/typeutils/internaltofrontend_test.go
index 651ff867d..a44afe67e 100644
--- a/internal/typeutils/internaltofrontend_test.go
+++ b/internal/typeutils/internaltofrontend_test.go
@@ -1063,15 +1063,21 @@ func (suite *InternalToFrontendTestSuite) TestHideFilteredBoostToFrontend() {
// Test that a hashtag filter for a hashtag in Mastodon HTML content works the way most users would expect.
func (suite *InternalToFrontendTestSuite) testHashtagFilteredStatusToFrontend(wholeWord bool, boost bool) {
- testStatus := suite.testStatuses["admin_account_status_1"]
+ testStatus := new(gtsmodel.Status)
+ *testStatus = *suite.testStatuses["admin_account_status_1"]
testStatus.Content = `<p>doggo doggin' it</p><p><a href="https://example.test/tags/dogsofmastodon" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>dogsofmastodon</span></a></p>`
if boost {
- // Modify a fixture boost into a boost of the above status.
- boostStatus := suite.testStatuses["admin_account_status_4"]
- boostStatus.BoostOf = testStatus
- boostStatus.BoostOfID = testStatus.ID
- testStatus = boostStatus
+ boost, err := suite.typeconverter.StatusToBoost(
+ context.Background(),
+ testStatus,
+ suite.testAccounts["admin_account"],
+ "",
+ )
+ if err != nil {
+ suite.FailNow(err.Error())
+ }
+ testStatus = boost
}
requestingAccount := suite.testAccounts["local_account_1"]
@@ -1103,9 +1109,11 @@ func (suite *InternalToFrontendTestSuite) testHashtagFilteredStatusToFrontend(wh
[]*gtsmodel.Filter{filter},
nil,
)
- if suite.NoError(err) {
- suite.NotEmpty(apiStatus.Filtered)
+ if err != nil {
+ suite.FailNow(err.Error())
}
+
+ suite.NotEmpty(apiStatus.Filtered)
}
func (suite *InternalToFrontendTestSuite) TestHashtagWholeWordFilteredStatusToFrontend() {
diff --git a/internal/typeutils/util.go b/internal/typeutils/util.go
index 3441e89a9..3a867ba35 100644
--- a/internal/typeutils/util.go
+++ b/internal/typeutils/util.go
@@ -27,6 +27,7 @@ import (
"strconv"
"strings"
+ "github.com/k3a/html2text"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
@@ -284,3 +285,64 @@ func ContentToContentLanguage(
return contentStr, langTagStr
}
+
+// filterableFields returns text fields from
+// a status that we might want to filter on:
+//
+// - content warning
+// - content (converted to plaintext from HTML)
+// - media descriptions
+// - poll options
+//
+// Each field should be filtered separately.
+// This avoids scenarios where false-positive
+// multiple-word matches can be made by matching
+// the last word of one field + the first word
+// of the next field together.
+func filterableFields(s *gtsmodel.Status) []string {
+ // Estimate length of fields.
+ fieldCount := 2 + len(s.Attachments)
+ if s.Poll != nil {
+ fieldCount += len(s.Poll.Options)
+ }
+ fields := make([]string, 0, fieldCount)
+
+ // Content warning / title.
+ if s.ContentWarning != "" {
+ fields = append(fields, s.ContentWarning)
+ }
+
+ // Status content. Though we have raw text
+ // available for statuses created on our
+ // instance, use the html2text version to
+ // remove markdown-formatting characters
+ // and ensure more consistent filtering.
+ if s.Content != "" {
+ text := html2text.HTML2TextWithOptions(
+ s.Content,
+ html2text.WithLinksInnerText(),
+ html2text.WithUnixLineBreaks(),
+ )
+ if text != "" {
+ fields = append(fields, text)
+ }
+ }
+
+ // Media descriptions.
+ for _, attachment := range s.Attachments {
+ if attachment.Description != "" {
+ fields = append(fields, attachment.Description)
+ }
+ }
+
+ // Poll options.
+ if s.Poll != nil {
+ for _, opt := range s.Poll.Options {
+ if opt != "" {
+ fields = append(fields, opt)
+ }
+ }
+ }
+
+ return fields
+}
diff --git a/internal/typeutils/util_test.go b/internal/typeutils/util_test.go
index 0f852d399..ea6667519 100644
--- a/internal/typeutils/util_test.go
+++ b/internal/typeutils/util_test.go
@@ -21,6 +21,7 @@ import (
"context"
"testing"
+ "github.com/stretchr/testify/assert"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/language"
@@ -158,3 +159,62 @@ func TestContentToContentLanguage(t *testing.T) {
}
}
}
+
+func TestFilterableText(t *testing.T) {
+ type testcase struct {
+ status *gtsmodel.Status
+ expectedFields []string
+ }
+
+ for _, testcase := range []testcase{
+ {
+ status: &gtsmodel.Status{
+ ContentWarning: "This is a test status",
+ Content: `<p>Import / export of account data via CSV files will be coming in 0.17.0 :) No more having to run scripts + CLI tools to import a list of accounts you follow, after doing a migration to a <a href="https://gts.superseriousbusiness.org/tags/gotosocial" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>GoToSocial</span></a> instance.</p>`,
+ },
+ expectedFields: []string{
+ "This is a test status",
+ "Import / export of account data via CSV files will be coming in 0.17.0 :) No more having to run scripts + CLI tools to import a list of accounts you follow, after doing a migration to a #GoToSocial <https://gts.superseriousbusiness.org/tags/gotosocial> instance.",
+ },
+ },
+ {
+ status: &gtsmodel.Status{
+ Content: `<p><span class="h-card"><a href="https://example.org/@zlatko" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>zlatko</span></a></span> currently we used modernc/sqlite3 for our sqlite driver, but we've been experimenting with wasm sqlite, and will likely move to that permanently in future; in the meantime, both options are available (the latter with a build tag)</p><p><a href="https://github.com/superseriousbusiness/gotosocial/pull/2863" rel="nofollow noreferrer noopener" target="_blank">https://github.com/superseriousbusiness/gotosocial/pull/2863</a></p>`,
+ },
+ expectedFields: []string{
+ "@zlatko <https://example.org/@zlatko> currently we used modernc/sqlite3 for our sqlite driver, but we've been experimenting with wasm sqlite, and will likely move to that permanently in future; in the meantime, both options are available (the latter with a build tag)\n\nhttps://github.com/superseriousbusiness/gotosocial/pull/2863 <https://github.com/superseriousbusiness/gotosocial/pull/2863>",
+ },
+ },
+ {
+ status: &gtsmodel.Status{
+ ContentWarning: "Nerd stuff",
+ Content: `<p>Latest graphs for <a href="https://gts.superseriousbusiness.org/tags/gotosocial" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>GoToSocial</span></a> on <a href="https://github.com/ncruces/go-sqlite3" rel="nofollow noreferrer noopener" target="_blank">Wasm sqlite3</a> with <a href="https://codeberg.org/gruf/go-ffmpreg" rel="nofollow noreferrer noopener" target="_blank">embedded Wasm ffmpeg</a>, both running on <a href="https://wazero.io/" rel="nofollow noreferrer noopener" target="_blank">Wazero</a>, and configured with a <a href="https://github.com/superseriousbusiness/gotosocial/blob/20fe430ef9ff3012a7a4dc2d01b68020c20e13bb/example/config.yaml#L259-L266" rel="nofollow noreferrer noopener" target="_blank">50MiB db cache target</a>. This is the version we'll be releasing soonish, now we're happy with how we've tamed everything.</p>`,
+ Attachments: []*gtsmodel.MediaAttachment{
+ {
+ Description: `Graph showing GtS using between 150-300 MiB of memory, steadily, over a few days.`,
+ },
+ {
+ Description: `Another media attachment`,
+ },
+ },
+ Poll: &gtsmodel.Poll{
+ Options: []string{
+ "Poll option 1",
+ "Poll option 2",
+ },
+ },
+ },
+ expectedFields: []string{
+ "Nerd stuff",
+ "Latest graphs for #GoToSocial <https://gts.superseriousbusiness.org/tags/gotosocial> on Wasm sqlite3 <https://github.com/ncruces/go-sqlite3> with embedded Wasm ffmpeg <https://codeberg.org/gruf/go-ffmpreg>, both running on Wazero <https://wazero.io/>, and configured with a 50MiB db cache target <https://github.com/superseriousbusiness/gotosocial/blob/20fe430ef9ff3012a7a4dc2d01b68020c20e13bb/example/config.yaml#L259-L266>. This is the version we'll be releasing soonish, now we're happy with how we've tamed everything.",
+ "Graph showing GtS using between 150-300 MiB of memory, steadily, over a few days.",
+ "Another media attachment",
+ "Poll option 1",
+ "Poll option 2",
+ },
+ },
+ } {
+ fields := filterableFields(testcase.status)
+ assert.Equal(t, testcase.expectedFields, fields)
+ }
+}