summaryrefslogtreecommitdiff
path: root/internal/typeutils/util.go
diff options
context:
space:
mode:
authorLibravatar tobi <31960611+tsmethurst@users.noreply.github.com>2024-09-16 14:00:23 +0200
committerLibravatar GitHub <noreply@github.com>2024-09-16 14:00:23 +0200
commitefd1a4f717afa83d3d3609f0d70e4da151a8dc9b (patch)
tree246ae4c12f86f8866e5299ae39ba5c1feba0bce4 /internal/typeutils/util.go
parent[bugfix/chore] Always set the status sensitive if media + content-warning pre... (diff)
downloadgotosocial-efd1a4f717afa83d3d3609f0d70e4da151a8dc9b.tar.xz
[bugfix] Use better plaintext representation of status for filtering (#3301)
* [bugfix] Use better plaintext representation of status for filtering * add new deps to readme * lint * update tests * update regexes * address review comments * remove now unused xxhash * whoops, wrong logger * Merge branch 'main' into status_filtering_bugfix * put cache in caches struct * pain
Diffstat (limited to 'internal/typeutils/util.go')
-rw-r--r--internal/typeutils/util.go62
1 files changed, 62 insertions, 0 deletions
diff --git a/internal/typeutils/util.go b/internal/typeutils/util.go
index 3441e89a9..3a867ba35 100644
--- a/internal/typeutils/util.go
+++ b/internal/typeutils/util.go
@@ -27,6 +27,7 @@ import (
"strconv"
"strings"
+ "github.com/k3a/html2text"
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
@@ -284,3 +285,64 @@ func ContentToContentLanguage(
return contentStr, langTagStr
}
+
+// filterableFields returns text fields from
+// a status that we might want to filter on:
+//
+// - content warning
+// - content (converted to plaintext from HTML)
+// - media descriptions
+// - poll options
+//
+// Each field should be filtered separately.
+// This avoids scenarios where false-positive
+// multiple-word matches can be made by matching
+// the last word of one field + the first word
+// of the next field together.
+func filterableFields(s *gtsmodel.Status) []string {
+ // Estimate length of fields.
+ fieldCount := 2 + len(s.Attachments)
+ if s.Poll != nil {
+ fieldCount += len(s.Poll.Options)
+ }
+ fields := make([]string, 0, fieldCount)
+
+ // Content warning / title.
+ if s.ContentWarning != "" {
+ fields = append(fields, s.ContentWarning)
+ }
+
+ // Status content. Though we have raw text
+ // available for statuses created on our
+ // instance, use the html2text version to
+ // remove markdown-formatting characters
+ // and ensure more consistent filtering.
+ if s.Content != "" {
+ text := html2text.HTML2TextWithOptions(
+ s.Content,
+ html2text.WithLinksInnerText(),
+ html2text.WithUnixLineBreaks(),
+ )
+ if text != "" {
+ fields = append(fields, text)
+ }
+ }
+
+ // Media descriptions.
+ for _, attachment := range s.Attachments {
+ if attachment.Description != "" {
+ fields = append(fields, attachment.Description)
+ }
+ }
+
+ // Poll options.
+ if s.Poll != nil {
+ for _, opt := range s.Poll.Options {
+ if opt != "" {
+ fields = append(fields, opt)
+ }
+ }
+ }
+
+ return fields
+}