summaryrefslogtreecommitdiff
path: root/internal/filter/spam/statusable.go
diff options
context:
space:
mode:
Diffstat (limited to 'internal/filter/spam/statusable.go')
-rw-r--r--internal/filter/spam/statusable.go472
1 files changed, 472 insertions, 0 deletions
diff --git a/internal/filter/spam/statusable.go b/internal/filter/spam/statusable.go
new file mode 100644
index 000000000..60598f920
--- /dev/null
+++ b/internal/filter/spam/statusable.go
@@ -0,0 +1,472 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package spam
+
+import (
+ "context"
+ "errors"
+ "net/url"
+ "slices"
+ "strings"
+
+ "github.com/miekg/dns"
+ "github.com/superseriousbusiness/gotosocial/internal/ap"
+ "github.com/superseriousbusiness/gotosocial/internal/config"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/gtscontext"
+ "github.com/superseriousbusiness/gotosocial/internal/gtserror"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/superseriousbusiness/gotosocial/internal/regexes"
+ "github.com/superseriousbusiness/gotosocial/internal/util"
+)
+
+// preppedMention represents a partially-parsed
+// mention, prepared for spam checking purposes.
+type preppedMention struct {
+ *gtsmodel.Mention
+ uri *url.URL
+ domain string
+ user string
+ local bool
+}
+
+// StatusableOK returns no error if the given statusable looks OK,
+// ie., relevant to the receiver, and not spam.
+//
+// This should only be used for Creates of statusables, NOT Announces!
+//
+// If the statusable does not pass relevancy or spam checks, either
+// a Spam or NotRelevant error will be returned. Callers should use
+// gtserror.IsSpam() and gtserror.IsNotRelevant() to check for this.
+//
+// If the returned error is not nil, but neither Spam or NotRelevant,
+// then it's an actual database error.
+//
+// The decision is made based on the following heuristics, in order:
+//
+// 1. Receiver follow requester. Return nil.
+// 2. Statusable doesn't mention receiver. Return NotRelevant.
+//
+// If instance-federation-spam-filter = false, then return nil now.
+// Otherwise check:
+//
+// 3. Receiver is locked and is followed by requester. Return nil.
+// 4. Five or more people are mentioned. Return Spam.
+// 5. Receiver follow (requests) a mentioned account. Return nil.
+// 6. Statusable has a media attachment. Return Spam.
+// 7. Statusable contains non-mention, non-hashtag links. Return Spam.
+func (f *Filter) StatusableOK(
+ ctx context.Context,
+ receiver *gtsmodel.Account,
+ requester *gtsmodel.Account,
+ statusable ap.Statusable,
+) error {
+ // HEURISTIC 1: Check whether receiving account follows the requesting account.
+ // If so, we know it's OK and don't need to do any other checks.
+ follows, err := f.state.DB.IsFollowing(ctx, receiver.ID, requester.ID)
+ if err != nil {
+ return gtserror.Newf("db error checking follow status: %w", err)
+ }
+
+ if follows {
+ // Looks fine.
+ return nil
+ }
+
+ // HEURISTIC 2: Check whether statusable mentions the
+ // receiver. If not, we don't want to process this message.
+ rawMentions, _ := ap.ExtractMentions(statusable)
+ mentions := prepMentions(ctx, rawMentions)
+ mentioned := f.isMentioned(ctx, receiver, mentions)
+ if !mentioned {
+ // This is a random message fired
+ // into our inbox, just drop it.
+ err := errors.New("receiver does not follow requester, and is not mentioned")
+ return gtserror.SetNotRelevant(err)
+ }
+
+ // Receiver is mentioned, but not by someone
+ // they follow. Check if we need to do more
+ // granular spam filtering.
+ if !config.GetInstanceFederationSpamFilter() {
+ // Filter is not enabled, allow it
+ // through without further checks.
+ return nil
+ }
+
+ // More granular spam filtering time!
+ //
+ // HEURISTIC 3: Does requester follow locked receiver?
+ followedBy, err := f.lockedFollowedBy(ctx, receiver, requester)
+ if err != nil {
+ return gtserror.Newf("db error checking follow status: %w", err)
+ }
+
+ // If receiver is locked, and is followed
+ // by requester, this likely means they're
+ // interested in the message. Allow it.
+ if followedBy {
+ return nil
+ }
+
+ // HEURISTIC 4: How many people are mentioned?
+ // If it's 5 or more we can assume this is spam.
+ mentionsLen := len(mentions)
+ if mentionsLen >= 5 {
+ err := errors.New("status mentions 5 or more people")
+ return gtserror.SetSpam(err)
+ }
+
+ // HEURISTIC 5: Four or fewer people are mentioned,
+ // do we follow (request) at least one of them?
+ // If so, we're probably interested in the message.
+ knowsOne := f.knowsOneMentioned(ctx, receiver, mentions)
+ if knowsOne {
+ return nil
+ }
+
+ // HEURISTIC 6: Are there any media attachments?
+ attachments, _ := ap.ExtractAttachments(statusable)
+ hasAttachments := len(attachments) != 0
+ if hasAttachments {
+ err := errors.New("status has attachment(s)")
+ return gtserror.SetSpam(err)
+ }
+
+ // HEURISTIC 7: Are there any links in the post
+ // aside from mentions and hashtags? Include the
+ // summary/content warning when checking.
+ hashtags, _ := ap.ExtractHashtags(statusable)
+ hasErrantLinks := f.errantLinks(ctx, statusable, mentions, hashtags)
+ if hasErrantLinks {
+ err := errors.New("status has one or more non-mention, non-hashtag links")
+ return gtserror.SetSpam(err)
+ }
+
+ // Looks OK.
+ return nil
+}
+
+// prepMentions prepares a slice of mentions
+// for spam checking by parsing out the namestring
+// and targetAccountURI values, if present.
+func prepMentions(
+ ctx context.Context,
+ mentions []*gtsmodel.Mention,
+) []preppedMention {
+ var (
+ host = config.GetHost()
+ accountDomain = config.GetAccountDomain()
+ )
+
+ parsedMentions := make([]preppedMention, 0, len(mentions))
+ for _, mention := range mentions {
+ // Start by just embedding
+ // the original mention.
+ parsedMention := preppedMention{
+ Mention: mention,
+ }
+
+ // Try to parse namestring if present.
+ if mention.NameString != "" {
+ user, domain, err := util.ExtractNamestringParts(mention.NameString)
+ if err != nil {
+ // Malformed mention,
+ // just log + ignore.
+ log.Debugf(ctx,
+ "malformed mention namestring: %v",
+ err,
+ )
+ continue
+ }
+
+ parsedMention.domain = domain
+ parsedMention.user = user
+ }
+
+ // Try to parse URI if present.
+ if mention.TargetAccountURI != "" {
+ targetURI, err := url.Parse(mention.TargetAccountURI)
+ if err != nil {
+ // Malformed mention,
+ // just log + ignore.
+ log.Debugf(ctx,
+ "malformed mention uri: %v",
+ err,
+ )
+ continue
+ }
+
+ parsedMention.uri = targetURI
+
+ // Set host from targetURI if
+ // it wasn't set by namestring.
+ if parsedMention.domain == "" {
+ parsedMention.domain = targetURI.Host
+ }
+ }
+
+ // It's a mention of a local account if the target host is us.
+ parsedMention.local = parsedMention.domain == host || parsedMention.domain == accountDomain
+
+ // Done with this one.
+ parsedMentions = append(parsedMentions, parsedMention)
+ }
+
+ return parsedMentions
+}
+
+// isMentioned returns true if the
+// receiver is targeted by at least
+// one of the given mentions.
+func (f *Filter) isMentioned(
+ ctx context.Context,
+ receiver *gtsmodel.Account,
+ mentions []preppedMention,
+) bool {
+ return slices.ContainsFunc(
+ mentions,
+ func(mention preppedMention) bool {
+ // Check if receiver mentioned by URI.
+ if accURI := mention.TargetAccountURI; accURI != "" &&
+ (accURI == receiver.URI || accURI == receiver.URL) {
+ return true
+ }
+
+ // Check if receiver mentioned by namestring.
+ if mention.local && strings.EqualFold(mention.user, receiver.Username) {
+ return true
+ }
+
+ // Mention doesn't
+ // target receiver.
+ return false
+ },
+ )
+}
+
+// lockedFollowedBy returns true
+// if receiver account is locked,
+// and requester follows receiver.
+func (f *Filter) lockedFollowedBy(
+ ctx context.Context,
+ receiver *gtsmodel.Account,
+ requester *gtsmodel.Account,
+) (bool, error) {
+ // If receiver is not locked,
+ // return early to avoid a db call.
+ if !*receiver.Locked {
+ return false, nil
+ }
+
+ return f.state.DB.IsFollowing(ctx, requester.ID, receiver.ID)
+}
+
+// knowsOneMentioned returns true if the
+// receiver follows or has follow requested
+// at least one of the mentioned accounts.
+func (f *Filter) knowsOneMentioned(
+ ctx context.Context,
+ receiver *gtsmodel.Account,
+ mentions []preppedMention,
+) bool {
+ return slices.ContainsFunc(
+ mentions,
+ func(mention preppedMention) bool {
+ var (
+ acc *gtsmodel.Account
+ err error
+ )
+
+ // Try to get target account without
+ // dereffing. After all, if they're not
+ // in our db we definitely don't know them.
+ if mention.TargetAccountURI != "" {
+ acc, err = f.state.DB.GetAccountByURI(
+ gtscontext.SetBarebones(ctx),
+ mention.TargetAccountURI,
+ )
+ } else if mention.user != "" {
+ acc, err = f.state.DB.GetAccountByUsernameDomain(
+ gtscontext.SetBarebones(ctx),
+ mention.user,
+ mention.domain,
+ )
+ }
+
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ // Proper error.
+ log.Errorf(ctx, "db error getting mentioned account: %v", err)
+ return false
+ }
+
+ if acc == nil {
+ // We don't know this nerd!
+ return false
+ }
+
+ if acc.ID == receiver.ID {
+ // This is us, doesn't count.
+ return false
+ }
+
+ follows, err := f.state.DB.IsFollowing(ctx, receiver.ID, acc.ID)
+ if err != nil {
+ // Proper error.
+ log.Errorf(ctx, "db error checking follow status: %v", err)
+ return false
+ }
+
+ if follows {
+ // We follow this nerd.
+ return true
+ }
+
+ // We don't follow this nerd, but
+ // have we requested to follow them?
+ followRequested, err := f.state.DB.IsFollowRequested(ctx, receiver.ID, acc.ID)
+ if err != nil {
+ // Proper error.
+ log.Errorf(ctx, "db error checking follow req status: %v", err)
+ return false
+ }
+
+ return followRequested
+ },
+ )
+}
+
+// errantLinks returns true if any http/https
+// link discovered in the statusable content + cw
+// is not either a mention link, or a hashtag link.
+func (f *Filter) errantLinks(
+ ctx context.Context,
+ statusable ap.Statusable,
+ mentions []preppedMention,
+ hashtags []*gtsmodel.Tag,
+) bool {
+ // Concatenate the cw with the
+ // content to check for links in both.
+ cw := ap.ExtractSummary(statusable)
+ content := ap.ExtractContent(statusable)
+ concat := cw + " " + content.Content
+
+ // Store link string alongside link
+ // URI to avoid stringifying twice.
+ type preppedLink struct {
+ *url.URL
+ str string
+ }
+
+ // Find + parse every http/https link in the status.
+ rawLinks := regexes.LinkScheme.FindAllString(concat, -1)
+ links := make([]preppedLink, 0, len(rawLinks))
+ for _, rawLink := range rawLinks {
+ linkURI, err := url.Parse(rawLink)
+ if err != nil {
+ log.Debugf(ctx,
+ "malformed link in status: %v",
+ err,
+ )
+ // Ignore bad links
+ // for spam checking.
+ continue
+ }
+
+ links = append(links, preppedLink{
+ URL: linkURI,
+ str: rawLink,
+ })
+ }
+
+ // For each link in the status, try to
+ // match it to a hashtag or a mention.
+ // If we can't, we have an errant link.
+ for _, link := range links {
+ hashtagLink := slices.ContainsFunc(
+ hashtags,
+ func(hashtag *gtsmodel.Tag) bool {
+ // If a link is to the href
+ // of a hashtag, it's fine.
+ return strings.EqualFold(
+ link.str,
+ hashtag.Href,
+ )
+ },
+ )
+
+ if hashtagLink {
+ // This link is accounted for.
+ // Move to the next one.
+ continue
+ }
+
+ mentionLink := slices.ContainsFunc(
+ mentions,
+ func(mention preppedMention) bool {
+ // If link is straight up to the URI
+ // of a mentioned account, it's fine.
+ if strings.EqualFold(
+ link.str,
+ mention.TargetAccountURI,
+ ) {
+ return true
+ }
+
+ // Link might be to an account URL rather
+ // than URI. This is a bit trickier because
+ // we can't predict the format of such URLs,
+ // and it's difficult to reconstruct them
+ // while also taking account of different
+ // host + account-domain values.
+ //
+ // So, just check if this link is on the same
+ // host as the mentioned account, or at least
+ // shares a host with it.
+ if link.Host == mention.domain {
+ // Same host.
+ return true
+ }
+
+ // Shares a host if it has at least two
+ // components from the right in common.
+ common := dns.CompareDomainName(
+ link.Host,
+ mention.domain,
+ )
+ return common >= 2
+ },
+ )
+
+ if mentionLink {
+ // This link is accounted for.
+ // Move to the next one.
+ continue
+ }
+
+ // Not a hashtag link
+ // or a mention link,
+ // so it's errant.
+ return true
+ }
+
+ // All links OK, or
+ // no links found.
+ return false
+}