diff options
author | 2024-09-16 14:00:23 +0200 | |
---|---|---|
committer | 2024-09-16 14:00:23 +0200 | |
commit | efd1a4f717afa83d3d3609f0d70e4da151a8dc9b (patch) | |
tree | 246ae4c12f86f8866e5299ae39ba5c1feba0bce4 /internal/typeutils/util.go | |
parent | [bugfix/chore] Always set the status sensitive if media + content-warning pre... (diff) | |
download | gotosocial-efd1a4f717afa83d3d3609f0d70e4da151a8dc9b.tar.xz |
[bugfix] Use better plaintext representation of status for filtering (#3301)
* [bugfix] Use better plaintext representation of status for filtering
* add new deps to readme
* lint
* update tests
* update regexes
* address review comments
* remove now unused xxhash
* whoops, wrong logger
* Merge branch 'main' into status_filtering_bugfix
* put cache in caches struct
* pain
Diffstat (limited to 'internal/typeutils/util.go')
-rw-r--r-- | internal/typeutils/util.go | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/internal/typeutils/util.go b/internal/typeutils/util.go index 3441e89a9..3a867ba35 100644 --- a/internal/typeutils/util.go +++ b/internal/typeutils/util.go @@ -27,6 +27,7 @@ import ( "strconv" "strings" + "github.com/k3a/html2text" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" "github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -284,3 +285,64 @@ func ContentToContentLanguage( return contentStr, langTagStr } + +// filterableFields returns text fields from +// a status that we might want to filter on: +// +// - content warning +// - content (converted to plaintext from HTML) +// - media descriptions +// - poll options +// +// Each field should be filtered separately. +// This avoids scenarios where false-positive +// multiple-word matches can be made by matching +// the last word of one field + the first word +// of the next field together. +func filterableFields(s *gtsmodel.Status) []string { + // Estimate length of fields. + fieldCount := 2 + len(s.Attachments) + if s.Poll != nil { + fieldCount += len(s.Poll.Options) + } + fields := make([]string, 0, fieldCount) + + // Content warning / title. + if s.ContentWarning != "" { + fields = append(fields, s.ContentWarning) + } + + // Status content. Though we have raw text + // available for statuses created on our + // instance, use the html2text version to + // remove markdown-formatting characters + // and ensure more consistent filtering. + if s.Content != "" { + text := html2text.HTML2TextWithOptions( + s.Content, + html2text.WithLinksInnerText(), + html2text.WithUnixLineBreaks(), + ) + if text != "" { + fields = append(fields, text) + } + } + + // Media descriptions. + for _, attachment := range s.Attachments { + if attachment.Description != "" { + fields = append(fields, attachment.Description) + } + } + + // Poll options. + if s.Poll != nil { + for _, opt := range s.Poll.Options { + if opt != "" { + fields = append(fields, opt) + } + } + } + + return fields +} |