diff options
author | 2023-06-21 18:26:40 +0200 | |
---|---|---|
committer | 2023-06-21 17:26:40 +0100 | |
commit | 831ae09f8bab04af854243421047371339c3e190 (patch) | |
tree | f7709d478cc363dc1899bdb658fe20e2dc7986f3 /internal/processing | |
parent | [docs] Disambiguate docker version, don't recommend opening localhost (#1913) (diff) | |
download | gotosocial-831ae09f8bab04af854243421047371339c3e190.tar.xz |
[feature] Add partial text search for accounts + statuses (#1836)
Diffstat (limited to 'internal/processing')
-rw-r--r-- | internal/processing/processor.go | 7 | ||||
-rw-r--r-- | internal/processing/search.go | 295 | ||||
-rw-r--r-- | internal/processing/search/accounts.go | 110 | ||||
-rw-r--r-- | internal/processing/search/get.go | 696 | ||||
-rw-r--r-- | internal/processing/search/lookup.go | 114 | ||||
-rw-r--r-- | internal/processing/search/search.go | 42 | ||||
-rw-r--r-- | internal/processing/search/util.go | 138 |
7 files changed, 1107 insertions, 295 deletions
diff --git a/internal/processing/processor.go b/internal/processing/processor.go index b67e5252e..377f176e5 100644 --- a/internal/processing/processor.go +++ b/internal/processing/processor.go @@ -32,6 +32,7 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/processing/list" "github.com/superseriousbusiness/gotosocial/internal/processing/media" "github.com/superseriousbusiness/gotosocial/internal/processing/report" + "github.com/superseriousbusiness/gotosocial/internal/processing/search" "github.com/superseriousbusiness/gotosocial/internal/processing/status" "github.com/superseriousbusiness/gotosocial/internal/processing/stream" "github.com/superseriousbusiness/gotosocial/internal/processing/timeline" @@ -60,6 +61,7 @@ type Processor struct { list list.Processor media media.Processor report report.Processor + search search.Processor status status.Processor stream stream.Processor timeline timeline.Processor @@ -90,6 +92,10 @@ func (p *Processor) Report() *report.Processor { return &p.report } +func (p *Processor) Search() *search.Processor { + return &p.search +} + func (p *Processor) Status() *status.Processor { return &p.status } @@ -137,6 +143,7 @@ func NewProcessor( processor.media = media.New(state, tc, mediaManager, federator.TransportController()) processor.report = report.New(state, tc) processor.timeline = timeline.New(state, tc, filter) + processor.search = search.New(state, federator, tc, filter) processor.status = status.New(state, federator, tc, filter, parseMentionFunc) processor.stream = stream.New(state, oauthServer) processor.user = user.New(state, emailSender) diff --git a/internal/processing/search.go b/internal/processing/search.go deleted file mode 100644 index ef5da9ee7..000000000 --- a/internal/processing/search.go +++ /dev/null @@ -1,295 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see <http://www.gnu.org/licenses/>. - -package processing - -import ( - "context" - "errors" - "fmt" - "net/url" - "strings" - - "codeberg.org/gruf/go-kv" - "github.com/superseriousbusiness/gotosocial/internal/ap" - apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" - "github.com/superseriousbusiness/gotosocial/internal/config" - "github.com/superseriousbusiness/gotosocial/internal/db" - "github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing" - "github.com/superseriousbusiness/gotosocial/internal/gtscontext" - "github.com/superseriousbusiness/gotosocial/internal/gtserror" - "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" - "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/oauth" - "github.com/superseriousbusiness/gotosocial/internal/util" -) - -// Implementation note: in this function, we tend to log errors -// at debug level rather than return them. This is because the -// search has a sort of fallthrough logic: if we can't get a result -// with x search, we should try with y search rather than returning. -// -// If we get to the end and still haven't found anything, even then -// we shouldn't return an error, just return an empty search result. -// -// The only exception to this is when we get a malformed query, in -// which case we return a bad request error so the user knows they -// did something funky. -func (p *Processor) SearchGet(ctx context.Context, authed *oauth.Auth, search *apimodel.SearchQuery) (*apimodel.SearchResult, gtserror.WithCode) { - // tidy up the query and make sure it wasn't just spaces - query := strings.TrimSpace(search.Query) - if query == "" { - err := errors.New("search query was empty string after trimming space") - return nil, gtserror.NewErrorBadRequest(err, err.Error()) - } - - l := log.WithContext(ctx). - WithFields(kv.Fields{{"query", query}}...) - - searchResult := &apimodel.SearchResult{ - Accounts: []apimodel.Account{}, - Statuses: []apimodel.Status{}, - Hashtags: []apimodel.Tag{}, - } - - // currently the search will only ever return one result, - // so return nothing if the offset is greater than 0 - if search.Offset > 0 { - return searchResult, nil - } - - foundAccounts := []*gtsmodel.Account{} - foundStatuses := []*gtsmodel.Status{} - - var foundOne bool - - /* - SEARCH BY MENTION - check if the query is something like @whatever_username@example.org -- this means it's likely a remote account - */ - maybeNamestring := query - if maybeNamestring[0] != '@' { - maybeNamestring = "@" + maybeNamestring - } - - if username, domain, err := util.ExtractNamestringParts(maybeNamestring); err == nil { - l.Trace("search term is a mention, looking it up...") - blocked, err := p.state.DB.IsDomainBlocked(ctx, domain) - if err != nil { - return nil, gtserror.NewErrorInternalError(fmt.Errorf("error checking domain block: %w", err)) - } - if blocked { - l.Debug("domain is blocked") - return searchResult, nil - } - - foundAccount, err := p.searchAccountByUsernameDomain(ctx, authed, username, domain, search.Resolve) - if err != nil { - var errNotRetrievable *dereferencing.ErrNotRetrievable - if !errors.As(err, &errNotRetrievable) { - // return a proper error only if it wasn't just not retrievable - return nil, gtserror.NewErrorInternalError(fmt.Errorf("error looking up account: %w", err)) - } - return searchResult, nil - } - - foundAccounts = append(foundAccounts, foundAccount) - foundOne = true - l.Trace("got an account by searching by mention") - } - - /* - SEARCH BY URI - check if the query is a URI with a recognizable scheme and dereference it - */ - if !foundOne { - if uri, err := url.Parse(query); err == nil { - if uri.Scheme == "https" || uri.Scheme == "http" { - l.Trace("search term is a uri, looking it up...") - blocked, err := p.state.DB.IsURIBlocked(ctx, uri) - if err != nil { - return nil, gtserror.NewErrorInternalError(fmt.Errorf("error checking domain block: %w", err)) - } - if blocked { - l.Debug("domain is blocked") - return searchResult, nil - } - - // check if it's a status... - foundStatus, err := p.searchStatusByURI(ctx, authed, uri) - if err != nil { - // Check for semi-expected error types. - var ( - errNotRetrievable *dereferencing.ErrNotRetrievable - errWrongType *ap.ErrWrongType - ) - if !errors.As(err, &errNotRetrievable) && !errors.As(err, &errWrongType) { - return nil, gtserror.NewErrorInternalError(fmt.Errorf("error looking up status: %w", err)) - } - } else { - foundStatuses = append(foundStatuses, foundStatus) - foundOne = true - l.Trace("got a status by searching by URI") - } - - // ... or an account - if !foundOne { - foundAccount, err := p.searchAccountByURI(ctx, authed, uri, search.Resolve) - if err != nil { - // Check for semi-expected error types. - var ( - errNotRetrievable *dereferencing.ErrNotRetrievable - errWrongType *ap.ErrWrongType - ) - if !errors.As(err, &errNotRetrievable) && !errors.As(err, &errWrongType) { - return nil, gtserror.NewErrorInternalError(fmt.Errorf("error looking up account: %w", err)) - } - } else { - foundAccounts = append(foundAccounts, foundAccount) - foundOne = true - l.Trace("got an account by searching by URI") - } - } - } - } - } - - if !foundOne { - // we got nothing, we can return early - l.Trace("found nothing, returning") - return searchResult, nil - } - - /* - FROM HERE ON we have our search results, it's just a matter of filtering them according to what this user is allowed to see, - and then converting them into our frontend format. - */ - for _, foundAccount := range foundAccounts { - // make sure there's no block in either direction between the account and the requester - blocked, err := p.state.DB.IsEitherBlocked(ctx, authed.Account.ID, foundAccount.ID) - if err != nil { - err = fmt.Errorf("SearchGet: error checking block between %s and %s: %s", authed.Account.ID, foundAccount.ID, err) - return nil, gtserror.NewErrorInternalError(err) - } - - if blocked { - l.Tracef("block exists between %s and %s, skipping this result", authed.Account.ID, foundAccount.ID) - continue - } - - apiAcct, err := p.tc.AccountToAPIAccountPublic(ctx, foundAccount) - if err != nil { - err = fmt.Errorf("SearchGet: error converting account %s to api account: %s", foundAccount.ID, err) - return nil, gtserror.NewErrorInternalError(err) - } - - searchResult.Accounts = append(searchResult.Accounts, *apiAcct) - } - - for _, foundStatus := range foundStatuses { - // make sure each found status is visible to the requester - visible, err := p.filter.StatusVisible(ctx, authed.Account, foundStatus) - if err != nil { - err = fmt.Errorf("SearchGet: error checking visibility of status %s for account %s: %s", foundStatus.ID, authed.Account.ID, err) - return nil, gtserror.NewErrorInternalError(err) - } - - if !visible { - l.Tracef("status %s is not visible to account %s, skipping this result", foundStatus.ID, authed.Account.ID) - continue - } - - apiStatus, err := p.tc.StatusToAPIStatus(ctx, foundStatus, authed.Account) - if err != nil { - err = fmt.Errorf("SearchGet: error converting status %s to api status: %s", foundStatus.ID, err) - return nil, gtserror.NewErrorInternalError(err) - } - - searchResult.Statuses = append(searchResult.Statuses, *apiStatus) - } - - return searchResult, nil -} - -func (p *Processor) searchStatusByURI(ctx context.Context, authed *oauth.Auth, uri *url.URL) (*gtsmodel.Status, error) { - status, _, err := p.federator.GetStatusByURI(gtscontext.SetFastFail(ctx), authed.Account.Username, uri) - return status, err -} - -func (p *Processor) searchAccountByURI(ctx context.Context, authed *oauth.Auth, uri *url.URL, resolve bool) (*gtsmodel.Account, error) { - if !resolve { - var ( - account *gtsmodel.Account - err error - uriStr = uri.String() - ) - - // Search the database for existing account with ID URI. - account, err = p.state.DB.GetAccountByURI(ctx, uriStr) - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return nil, fmt.Errorf("searchAccountByURI: error checking database for account %s: %w", uriStr, err) - } - - if account == nil { - // Else, search the database for existing by ID URL. - account, err = p.state.DB.GetAccountByURL(ctx, uriStr) - if err != nil { - if !errors.Is(err, db.ErrNoEntries) { - return nil, fmt.Errorf("searchAccountByURI: error checking database for account %s: %w", uriStr, err) - } - return nil, dereferencing.NewErrNotRetrievable(err) - } - } - - return account, nil - } - - account, _, err := p.federator.GetAccountByURI( - gtscontext.SetFastFail(ctx), - authed.Account.Username, - uri, - ) - return account, err -} - -func (p *Processor) searchAccountByUsernameDomain(ctx context.Context, authed *oauth.Auth, username string, domain string, resolve bool) (*gtsmodel.Account, error) { - if !resolve { - if domain == config.GetHost() || domain == config.GetAccountDomain() { - // We do local lookups using an empty domain, - // else it will fail the db search below. - domain = "" - } - - // Search the database for existing account with USERNAME@DOMAIN - account, err := p.state.DB.GetAccountByUsernameDomain(ctx, username, domain) - if err != nil { - if !errors.Is(err, db.ErrNoEntries) { - return nil, fmt.Errorf("searchAccountByUsernameDomain: error checking database for account %s@%s: %w", username, domain, err) - } - return nil, dereferencing.NewErrNotRetrievable(err) - } - - return account, nil - } - - account, _, err := p.federator.GetAccountByUsernameDomain( - gtscontext.SetFastFail(ctx), - authed.Account.Username, - username, domain, - ) - return account, err -} diff --git a/internal/processing/search/accounts.go b/internal/processing/search/accounts.go new file mode 100644 index 000000000..eb88647a3 --- /dev/null +++ b/internal/processing/search/accounts.go @@ -0,0 +1,110 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package search + +import ( + "context" + "errors" + "strings" + + "codeberg.org/gruf/go-kv" + apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/id" + "github.com/superseriousbusiness/gotosocial/internal/log" +) + +// Accounts does a partial search for accounts that +// match the given query. It expects input that looks +// like a namestring, and will normalize plaintext to look +// more like a namestring. For queries that include domain, +// it will only return one match at most. For namestrings +// that exclude domain, multiple matches may be returned. +// +// This behavior aligns more or less with Mastodon's API. +// See https://docs.joinmastodon.org/methods/accounts/#search. +func (p *Processor) Accounts( + ctx context.Context, + requestingAccount *gtsmodel.Account, + query string, + limit int, + offset int, + resolve bool, + following bool, +) ([]*apimodel.Account, gtserror.WithCode) { + var ( + foundAccounts = make([]*gtsmodel.Account, 0, limit) + appendAccount = func(foundAccount *gtsmodel.Account) { foundAccounts = append(foundAccounts, foundAccount) } + ) + + // Validate query. + query = strings.TrimSpace(query) + if query == "" { + err := gtserror.New("search query was empty string after trimming space") + return nil, gtserror.NewErrorBadRequest(err, err.Error()) + } + + // Be nice and normalize query by prepending '@'. + // This will make it easier for accountsByNamestring + // to pick this up as a valid namestring. + if query[0] != '@' { + query = "@" + query + } + + log. + WithContext(ctx). + WithFields(kv.Fields{ + {"limit", limit}, + {"offset", offset}, + {"query", query}, + {"resolve", resolve}, + {"following", following}, + }...). + Debugf("beginning search") + + // todo: Currently we don't support offset for paging; + // if caller supplied an offset greater than 0, return + // nothing as though there were no additional results. + if offset > 0 { + return p.packageAccounts(ctx, requestingAccount, foundAccounts) + } + + // Return all accounts we can find that match the + // provided query. If it's not a namestring, this + // won't return an error, it'll just return 0 results. + if _, err := p.accountsByNamestring( + ctx, + requestingAccount, + id.Highest, + id.Lowest, + limit, + offset, + query, + resolve, + following, + appendAccount, + ); err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error searching by namestring: %w", err) + return nil, gtserror.NewErrorInternalError(err) + } + + // Return whatever we got (if anything). + return p.packageAccounts(ctx, requestingAccount, foundAccounts) +} diff --git a/internal/processing/search/get.go b/internal/processing/search/get.go new file mode 100644 index 000000000..936e8acfa --- /dev/null +++ b/internal/processing/search/get.go @@ -0,0 +1,696 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package search + +import ( + "context" + "errors" + "fmt" + "net/mail" + "net/url" + "strings" + + "codeberg.org/gruf/go-kv" + "github.com/superseriousbusiness/gotosocial/internal/ap" + apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/util" +) + +const ( + queryTypeAny = "" + queryTypeAccounts = "accounts" + queryTypeStatuses = "statuses" + queryTypeHashtags = "hashtags" +) + +// Get performs a search for accounts and/or statuses using the +// provided request parameters. +// +// Implementation note: in this function, we try to only return +// an error to the caller they've submitted a bad request, or when +// a serious error has occurred. This is because the search has a +// sort of fallthrough logic: if we can't get a result with one +// type of search, we should proceed with y search rather than +// returning an early error. +// +// If we get to the end and still haven't found anything, even +// then we shouldn't return an error, just return an empty result. +func (p *Processor) Get( + ctx context.Context, + account *gtsmodel.Account, + req *apimodel.SearchRequest, +) (*apimodel.SearchResult, gtserror.WithCode) { + + var ( + maxID = req.MaxID + minID = req.MinID + limit = req.Limit + offset = req.Offset + query = strings.TrimSpace(req.Query) // Trim trailing/leading whitespace. + queryType = strings.TrimSpace(strings.ToLower(req.QueryType)) // Trim trailing/leading whitespace; convert to lowercase. + resolve = req.Resolve + following = req.Following + ) + + // Validate query. + if query == "" { + err := errors.New("search query was empty string after trimming space") + return nil, gtserror.NewErrorBadRequest(err, err.Error()) + } + + // Validate query type. + switch queryType { + case queryTypeAny, queryTypeAccounts, queryTypeStatuses, queryTypeHashtags: + // No problem. + default: + err := fmt.Errorf( + "search query type %s was not recognized, valid options are ['%s', '%s', '%s', '%s']", + queryType, queryTypeAny, queryTypeAccounts, queryTypeStatuses, queryTypeHashtags, + ) + return nil, gtserror.NewErrorBadRequest(err, err.Error()) + } + + log. + WithContext(ctx). + WithFields(kv.Fields{ + {"maxID", maxID}, + {"minID", minID}, + {"limit", limit}, + {"offset", offset}, + {"query", query}, + {"queryType", queryType}, + {"resolve", resolve}, + {"following", following}, + }...). + Debugf("beginning search") + + // todo: Currently we don't support offset for paging; + // a caller can page using maxID or minID, but if they + // supply an offset greater than 0, return nothing as + // though there were no additional results. + if req.Offset > 0 { + return p.packageSearchResult(ctx, account, nil, nil) + } + + var ( + foundStatuses = make([]*gtsmodel.Status, 0, limit) + foundAccounts = make([]*gtsmodel.Account, 0, limit) + appendStatus = func(foundStatus *gtsmodel.Status) { foundStatuses = append(foundStatuses, foundStatus) } + appendAccount = func(foundAccount *gtsmodel.Account) { foundAccounts = append(foundAccounts, foundAccount) } + keepLooking bool + err error + ) + + // Only try to search by namestring if search type includes + // accounts, since this is all namestring search can return. + if includeAccounts(queryType) { + // Copy query to avoid altering original. + var queryC = query + + // If query looks vaguely like an email address, ie. it doesn't + // start with '@' but it has '@' in it somewhere, it's probably + // a poorly-formed namestring. Be generous and correct for this. + if strings.Contains(queryC, "@") && queryC[0] != '@' { + if _, err := mail.ParseAddress(queryC); err == nil { + // Yep, really does look like + // an email address! Be nice. + queryC = "@" + queryC + } + } + + // Search using what may or may not be a namestring. + keepLooking, err = p.accountsByNamestring( + ctx, + account, + maxID, + minID, + limit, + offset, + queryC, + resolve, + following, + appendAccount, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error searching by namestring: %w", err) + return nil, gtserror.NewErrorInternalError(err) + } + + if !keepLooking { + // Return whatever we have. + return p.packageSearchResult( + ctx, + account, + foundAccounts, + foundStatuses, + ) + } + } + + // Check if the query is a URI with a recognizable + // scheme and use it to look for accounts or statuses. + keepLooking, err = p.byURI( + ctx, + account, + query, + queryType, + resolve, + appendAccount, + appendStatus, + ) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error searching by URI: %w", err) + return nil, gtserror.NewErrorInternalError(err) + } + + if !keepLooking { + // Return whatever we have. + return p.packageSearchResult( + ctx, + account, + foundAccounts, + foundStatuses, + ) + } + + // As a last resort, search for accounts and + // statuses using the query as arbitrary text. + if err := p.byText( + ctx, + account, + maxID, + minID, + limit, + offset, + query, + queryType, + following, + appendAccount, + appendStatus, + ); err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error searching by text: %w", err) + return nil, gtserror.NewErrorInternalError(err) + } + + // Return whatever we ended + // up with (could be nothing). + return p.packageSearchResult( + ctx, + account, + foundAccounts, + foundStatuses, + ) +} + +// accountsByNamestring searches for accounts using the +// provided namestring query. If domain is not set in +// the namestring, it may return more than one result +// by doing a text search in the database for accounts +// matching the query. Otherwise, it tries to return an +// exact match. +func (p *Processor) accountsByNamestring( + ctx context.Context, + requestingAccount *gtsmodel.Account, + maxID string, + minID string, + limit int, + offset int, + query string, + resolve bool, + following bool, + appendAccount func(*gtsmodel.Account), +) (bool, error) { + // See if we have something that looks like a namestring. + username, domain, err := util.ExtractNamestringParts(query) + if err != nil { + // No need to return error; just not a namestring + // we can search with. Caller should keep looking + // with another search method. + return true, nil //nolint:nilerr + } + + if domain == "" { + // No error, but no domain set. That means the query + // looked like '@someone' which is not an exact search. + // Try to search for any accounts that match the query + // string, and let the caller know they should stop. + return false, p.accountsByText( + ctx, + requestingAccount.ID, + maxID, + minID, + limit, + offset, + // OK to assume username is set now. Use + // it instead of query to omit leading '@'. + username, + following, + appendAccount, + ) + } + + // No error, and domain and username were both set. + // Caller is likely trying to search for an exact + // match, from either a remote instance or local. + foundAccount, err := p.accountByUsernameDomain( + ctx, + requestingAccount, + username, + domain, + resolve, + ) + if err != nil { + // Check for semi-expected error types. + // On one of these, we can continue. + var ( + errNotRetrievable = new(*dereferencing.ErrNotRetrievable) // Item can't be dereferenced. + errWrongType = new(*ap.ErrWrongType) // Item was dereferenced, but wasn't an account. + ) + + if !errors.As(err, errNotRetrievable) && !errors.As(err, errWrongType) { + err = gtserror.Newf("error looking up %s as account: %w", query, err) + return false, gtserror.NewErrorInternalError(err) + } + } else { + appendAccount(foundAccount) + } + + // Regardless of whether we have a hit at this point, + // return false to indicate caller should stop looking; + // namestrings are a very specific format so it's unlikely + // the caller was looking for something other than an account. + return false, nil +} + +// accountByUsernameDomain looks for one account with the given +// username and domain. If domain is empty, or equal to our domain, +// search will be confined to local accounts. +// +// Will return either a hit, an ErrNotRetrievable, an ErrWrongType, +// or a real error that the caller should handle. +func (p *Processor) accountByUsernameDomain( + ctx context.Context, + requestingAccount *gtsmodel.Account, + username string, + domain string, + resolve bool, +) (*gtsmodel.Account, error) { + var usernameDomain string + if domain == "" || domain == config.GetHost() || domain == config.GetAccountDomain() { + // Local lookup, normalize domain. + domain = "" + usernameDomain = username + } else { + // Remote lookup. + usernameDomain = username + "@" + domain + + // Ensure domain not blocked. + blocked, err := p.state.DB.IsDomainBlocked(ctx, domain) + if err != nil { + err = gtserror.Newf("error checking domain block: %w", err) + return nil, gtserror.NewErrorInternalError(err) + } + + if blocked { + // Don't search on blocked domain. + return nil, dereferencing.NewErrNotRetrievable(err) + } + } + + if resolve { + // We're allowed to resolve, leave the + // rest up to the dereferencer functions. + account, _, err := p.federator.GetAccountByUsernameDomain( + gtscontext.SetFastFail(ctx), + requestingAccount.Username, + username, domain, + ) + + return account, err + } + + // We're not allowed to resolve. Search the database + // for existing account with given username + domain. + account, err := p.state.DB.GetAccountByUsernameDomain(ctx, username, domain) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error checking database for account %s: %w", usernameDomain, err) + return nil, err + } + + if account != nil { + // We got a hit! No need to continue. + return account, nil + } + + err = fmt.Errorf("account %s could not be retrieved locally and we cannot resolve", usernameDomain) + return nil, dereferencing.NewErrNotRetrievable(err) +} + +// byURI looks for account(s) or a status with the given URI +// set as either its URL or ActivityPub URI. If it gets hits, it +// will call the provided append functions to return results. +// +// The boolean return value indicates to the caller whether the +// search should continue (true) or stop (false). False will be +// returned in cases where a hit has been found, the domain of the +// searched URI is blocked, or an unrecoverable error has occurred. +func (p *Processor) byURI( + ctx context.Context, + requestingAccount *gtsmodel.Account, + query string, + queryType string, + resolve bool, + appendAccount func(*gtsmodel.Account), + appendStatus func(*gtsmodel.Status), +) (bool, error) { + uri, err := url.Parse(query) + if err != nil { + // No need to return error; just not a URI + // we can search with. Caller should keep + // looking with another search method. + return true, nil //nolint:nilerr + } + + if !(uri.Scheme == "https" || uri.Scheme == "http") { + // This might just be a weirdly-parsed URI, + // since Go's url package tends to be a bit + // trigger-happy when deciding things are URIs. + // Indicate caller should keep looking. + return true, nil + } + + blocked, err := p.state.DB.IsURIBlocked(ctx, uri) + if err != nil { + err = gtserror.Newf("error checking domain block: %w", err) + return false, gtserror.NewErrorInternalError(err) + } + + if blocked { + // Don't search for blocked domains. + // Caller should stop looking. + return false, nil + } + + if includeAccounts(queryType) { + // Check if URI points to an account. + foundAccount, err := p.accountByURI(ctx, requestingAccount, uri, resolve) + if err != nil { + // Check for semi-expected error types. + // On one of these, we can continue. + var ( + errNotRetrievable = new(*dereferencing.ErrNotRetrievable) // Item can't be dereferenced. + errWrongType = new(*ap.ErrWrongType) // Item was dereferenced, but wasn't an account. + ) + + if !errors.As(err, errNotRetrievable) && !errors.As(err, errWrongType) { + err = gtserror.Newf("error looking up %s as account: %w", uri, err) + return false, gtserror.NewErrorInternalError(err) + } + } else { + // Hit; return false to indicate caller should + // stop looking, since it's extremely unlikely + // a status and an account will have the same URL. + appendAccount(foundAccount) + return false, nil + } + } + + if includeStatuses(queryType) { + // Check if URI points to a status. + foundStatus, err := p.statusByURI(ctx, requestingAccount, uri, resolve) + if err != nil { + // Check for semi-expected error types. + // On one of these, we can continue. + var ( + errNotRetrievable = new(*dereferencing.ErrNotRetrievable) // Item can't be dereferenced. + errWrongType = new(*ap.ErrWrongType) // Item was dereferenced, but wasn't a status. + ) + + if !errors.As(err, errNotRetrievable) && !errors.As(err, errWrongType) { + err = gtserror.Newf("error looking up %s as status: %w", uri, err) + return false, gtserror.NewErrorInternalError(err) + } + } else { + // Hit; return false to indicate caller should + // stop looking, since it's extremely unlikely + // a status and an account will have the same URL. + appendStatus(foundStatus) + return false, nil + } + } + + // No errors, but no hits either; since this + // was a URI, caller should stop looking. + return false, nil +} + +// accountByURI looks for one account with the given URI. +// If resolve is false, it will only look in the database. +// If resolve is true, it will try to resolve the account +// from remote using the URI, if necessary. +// +// Will return either a hit, ErrNotRetrievable, ErrWrongType, +// or a real error that the caller should handle. +func (p *Processor) accountByURI( + ctx context.Context, + requestingAccount *gtsmodel.Account, + uri *url.URL, + resolve bool, +) (*gtsmodel.Account, error) { + if resolve { + // We're allowed to resolve, leave the + // rest up to the dereferencer functions. + account, _, err := p.federator.GetAccountByURI( + gtscontext.SetFastFail(ctx), + requestingAccount.Username, + uri, + ) + + return account, err + } + + // We're not allowed to resolve; search database only. + uriStr := uri.String() // stringify uri just once + + // Search by ActivityPub URI. + account, err := p.state.DB.GetAccountByURI(ctx, uriStr) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error checking database for account using URI %s: %w", uriStr, err) + return nil, err + } + + if account != nil { + // We got a hit! No need to continue. + return account, nil + } + + // No hit yet. Fallback to try by URL. + account, err = p.state.DB.GetAccountByURL(ctx, uriStr) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error checking database for account using URL %s: %w", uriStr, err) + return nil, err + } + + if account != nil { + // We got a hit! No need to continue. + return account, nil + } + + err = fmt.Errorf("account %s could not be retrieved locally and we cannot resolve", uriStr) + return nil, dereferencing.NewErrNotRetrievable(err) +} + +// statusByURI looks for one status with the given URI. +// If resolve is false, it will only look in the database. +// If resolve is true, it will try to resolve the status +// from remote using the URI, if necessary. +// +// Will return either a hit, ErrNotRetrievable, ErrWrongType, +// or a real error that the caller should handle. +func (p *Processor) statusByURI( + ctx context.Context, + requestingAccount *gtsmodel.Account, + uri *url.URL, + resolve bool, +) (*gtsmodel.Status, error) { + if resolve { + // We're allowed to resolve, leave the + // rest up to the dereferencer functions. + status, _, err := p.federator.GetStatusByURI( + gtscontext.SetFastFail(ctx), + requestingAccount.Username, + uri, + ) + + return status, err + } + + // We're not allowed to resolve; search database only. + uriStr := uri.String() // stringify uri just once + + // Search by ActivityPub URI. + status, err := p.state.DB.GetStatusByURI(ctx, uriStr) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error checking database for status using URI %s: %w", uriStr, err) + return nil, err + } + + if status != nil { + // We got a hit! No need to continue. + return status, nil + } + + // No hit yet. Fallback to try by URL. + status, err = p.state.DB.GetStatusByURL(ctx, uriStr) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error checking database for status using URL %s: %w", uriStr, err) + return nil, err + } + + if status != nil { + // We got a hit! No need to continue. + return status, nil + } + + err = fmt.Errorf("status %s could not be retrieved locally and we cannot resolve", uriStr) + return nil, dereferencing.NewErrNotRetrievable(err) +} + +// byText searches in the database for accounts and/or +// statuses containing the given query string, using +// the provided parameters. +// +// If queryType is any (empty string), both accounts +// and statuses will be searched, else only the given +// queryType of item will be returned. +func (p *Processor) byText( + ctx context.Context, + requestingAccount *gtsmodel.Account, + maxID string, + minID string, + limit int, + offset int, + query string, + queryType string, + following bool, + appendAccount func(*gtsmodel.Account), + appendStatus func(*gtsmodel.Status), +) error { + if queryType == queryTypeAny { + // If search type is any, ignore maxID and minID + // parameters, since we can't use them to page + // on both accounts and statuses simultaneously. + maxID = "" + minID = "" + } + + if includeAccounts(queryType) { + // Search for accounts using the given text. + if err := p.accountsByText(ctx, + requestingAccount.ID, + maxID, + minID, + limit, + offset, + query, + following, + appendAccount, + ); err != nil { + return err + } + } + + if includeStatuses(queryType) { + // Search for statuses using the given text. + if err := p.statusesByText(ctx, + requestingAccount.ID, + maxID, + minID, + limit, + offset, + query, + appendStatus, + ); err != nil { + return err + } + } + + return nil +} + +// accountsByText searches in the database for limit +// number of accounts using the given query text. +func (p *Processor) accountsByText( + ctx context.Context, + requestingAccountID string, + maxID string, + minID string, + limit int, + offset int, + query string, + following bool, + appendAccount func(*gtsmodel.Account), +) error { + accounts, err := p.state.DB.SearchForAccounts( + ctx, + requestingAccountID, + query, maxID, minID, limit, following, offset) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return gtserror.Newf("error checking database for accounts using text %s: %w", query, err) + } + + for _, account := range accounts { + appendAccount(account) + } + + return nil +} + +// statusesByText searches in the database for limit +// number of statuses using the given query text. +func (p *Processor) statusesByText( + ctx context.Context, + requestingAccountID string, + maxID string, + minID string, + limit int, + offset int, + query string, + appendStatus func(*gtsmodel.Status), +) error { + statuses, err := p.state.DB.SearchForStatuses( + ctx, + requestingAccountID, + query, maxID, minID, limit, offset) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return gtserror.Newf("error checking database for statuses using text %s: %w", query, err) + } + + for _, status := range statuses { + appendStatus(status) + } + + return nil +} diff --git a/internal/processing/search/lookup.go b/internal/processing/search/lookup.go new file mode 100644 index 000000000..0f2a4191b --- /dev/null +++ b/internal/processing/search/lookup.go @@ -0,0 +1,114 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package search + +import ( + "context" + "errors" + "fmt" + "strings" + + errorsv2 "codeberg.org/gruf/go-errors/v2" + "codeberg.org/gruf/go-kv" + apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" + "github.com/superseriousbusiness/gotosocial/internal/federation/dereferencing" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/util" +) + +// Lookup does a quick, non-resolving search for accounts that +// match the given query. It expects input that looks like a +// namestring, and will normalize plaintext to look more like +// a namestring. Will only ever return one account, and only on +// an exact match. +// +// This behavior aligns more or less with Mastodon's API. +// See https://docs.joinmastodon.org/methods/accounts/#lookup +func (p *Processor) Lookup( + ctx context.Context, + requestingAccount *gtsmodel.Account, + query string, +) (*apimodel.Account, gtserror.WithCode) { + // Validate query. + query = strings.TrimSpace(query) + if query == "" { + err := errors.New("search query was empty string after trimming space") + return nil, gtserror.NewErrorBadRequest(err, err.Error()) + } + + // Be nice and normalize query by prepending '@'. + // This will make it easier for accountsByNamestring + // to pick this up as a valid namestring. + if query[0] != '@' { + query = "@" + query + } + + log. + WithContext(ctx). + WithFields(kv.Fields{ + {"query", query}, + }...). + Debugf("beginning search") + + // See if we have something that looks like a namestring. + username, domain, err := util.ExtractNamestringParts(query) + if err != nil { + err := errors.New("bad search query, must in the form '[username]' or '[username]@[domain]") + return nil, gtserror.NewErrorBadRequest(err, err.Error()) + } + + account, err := p.accountByUsernameDomain( + ctx, + requestingAccount, + username, + domain, + false, // never resolve! + ) + if err != nil { + if errorsv2.Assignable(err, (*dereferencing.ErrNotRetrievable)(nil)) { + // ErrNotRetrievable is fine, just wrap it in + // a 404 to indicate we couldn't find anything. + err := fmt.Errorf("%s not found", query) + return nil, gtserror.NewErrorNotFound(err, err.Error()) + } + + // Real error has occurred. + err = gtserror.Newf("error looking up %s as account: %w", query, err) + return nil, gtserror.NewErrorInternalError(err) + } + + // If we reach this point, we found an account. Shortcut + // using the packageAccounts function to return it. This + // may cause the account to be filtered out if it's not + // visible to the caller, so anticipate this. + accounts, errWithCode := p.packageAccounts(ctx, requestingAccount, []*gtsmodel.Account{account}) + if errWithCode != nil { + return nil, errWithCode + } + + if len(accounts) == 0 { + // Account was not visible to the requesting account. + err := fmt.Errorf("%s not found", query) + return nil, gtserror.NewErrorNotFound(err, err.Error()) + } + + // We got a hit! + return accounts[0], nil +} diff --git a/internal/processing/search/search.go b/internal/processing/search/search.go new file mode 100644 index 000000000..907877789 --- /dev/null +++ b/internal/processing/search/search.go @@ -0,0 +1,42 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package search + +import ( + "github.com/superseriousbusiness/gotosocial/internal/federation" + "github.com/superseriousbusiness/gotosocial/internal/state" + "github.com/superseriousbusiness/gotosocial/internal/typeutils" + "github.com/superseriousbusiness/gotosocial/internal/visibility" +) + +type Processor struct { + state *state.State + federator federation.Federator + tc typeutils.TypeConverter + filter *visibility.Filter +} + +// New returns a new status processor. +func New(state *state.State, federator federation.Federator, tc typeutils.TypeConverter, filter *visibility.Filter) Processor { + return Processor{ + state: state, + federator: federator, + tc: tc, + filter: filter, + } +} diff --git a/internal/processing/search/util.go b/internal/processing/search/util.go new file mode 100644 index 000000000..4172e4e1a --- /dev/null +++ b/internal/processing/search/util.go @@ -0,0 +1,138 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package search + +import ( + "context" + + apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" +) + +// return true if given queryType should include accounts. +func includeAccounts(queryType string) bool { + return queryType == queryTypeAny || queryType == queryTypeAccounts +} + +// return true if given queryType should include statuses. +func includeStatuses(queryType string) bool { + return queryType == queryTypeAny || queryType == queryTypeStatuses +} + +// packageAccounts is a util function that just +// converts the given accounts into an apimodel +// account slice, or errors appropriately. +func (p *Processor) packageAccounts( + ctx context.Context, + requestingAccount *gtsmodel.Account, + accounts []*gtsmodel.Account, +) ([]*apimodel.Account, gtserror.WithCode) { + apiAccounts := make([]*apimodel.Account, 0, len(accounts)) + + for _, account := range accounts { + if account.IsInstance() { + // No need to show instance accounts. + continue + } + + // Ensure requester can see result account. + visible, err := p.filter.AccountVisible(ctx, requestingAccount, account) + if err != nil { + err = gtserror.Newf("error checking visibility of account %s for account %s: %w", account.ID, requestingAccount.ID, err) + return nil, gtserror.NewErrorInternalError(err) + } + + if !visible { + log.Debugf(ctx, "account %s is not visible to account %s, skipping this result", account.ID, requestingAccount.ID) + continue + } + + apiAccount, err := p.tc.AccountToAPIAccountPublic(ctx, account) + if err != nil { + log.Debugf(ctx, "skipping account %s because it couldn't be converted to its api representation: %s", account.ID, err) + continue + } + + apiAccounts = append(apiAccounts, apiAccount) + } + + return apiAccounts, nil +} + +// packageStatuses is a util function that just +// converts the given statuses into an apimodel +// status slice, or errors appropriately. +func (p *Processor) packageStatuses( + ctx context.Context, + requestingAccount *gtsmodel.Account, + statuses []*gtsmodel.Status, +) ([]*apimodel.Status, gtserror.WithCode) { + apiStatuses := make([]*apimodel.Status, 0, len(statuses)) + + for _, status := range statuses { + // Ensure requester can see result status. + visible, err := p.filter.StatusVisible(ctx, requestingAccount, status) + if err != nil { + err = gtserror.Newf("error checking visibility of status %s for account %s: %w", status.ID, requestingAccount.ID, err) + return nil, gtserror.NewErrorInternalError(err) + } + + if !visible { + log.Debugf(ctx, "status %s is not visible to account %s, skipping this result", status.ID, requestingAccount.ID) + continue + } + + apiStatus, err := p.tc.StatusToAPIStatus(ctx, status, requestingAccount) + if err != nil { + log.Debugf(ctx, "skipping status %s because it couldn't be converted to its api representation: %s", status.ID, err) + continue + } + + apiStatuses = append(apiStatuses, apiStatus) + } + + return apiStatuses, nil +} + +// packageSearchResult wraps up the given accounts +// and statuses into an apimodel SearchResult that +// can be serialized to an API caller as JSON. +func (p *Processor) packageSearchResult( + ctx context.Context, + requestingAccount *gtsmodel.Account, + accounts []*gtsmodel.Account, + statuses []*gtsmodel.Status, +) (*apimodel.SearchResult, gtserror.WithCode) { + apiAccounts, errWithCode := p.packageAccounts(ctx, requestingAccount, accounts) + if errWithCode != nil { + return nil, errWithCode + } + + apiStatuses, errWithCode := p.packageStatuses(ctx, requestingAccount, statuses) + if errWithCode != nil { + return nil, errWithCode + } + + return &apimodel.SearchResult{ + Accounts: apiAccounts, + Statuses: apiStatuses, + Hashtags: make([]*apimodel.Tag, 0), + }, nil +} |