diff options
author | 2023-06-21 18:26:40 +0200 | |
---|---|---|
committer | 2023-06-21 17:26:40 +0100 | |
commit | 831ae09f8bab04af854243421047371339c3e190 (patch) | |
tree | f7709d478cc363dc1899bdb658fe20e2dc7986f3 /internal/db | |
parent | [docs] Disambiguate docker version, don't recommend opening localhost (#1913) (diff) | |
download | gotosocial-831ae09f8bab04af854243421047371339c3e190.tar.xz |
[feature] Add partial text search for accounts + statuses (#1836)
Diffstat (limited to 'internal/db')
-rw-r--r-- | internal/db/bundb/bundb.go | 5 | ||||
-rw-r--r-- | internal/db/bundb/migrations/20230620103932_search_updates.go | 64 | ||||
-rw-r--r-- | internal/db/bundb/search.go | 422 | ||||
-rw-r--r-- | internal/db/bundb/search_test.go | 82 | ||||
-rw-r--r-- | internal/db/db.go | 1 | ||||
-rw-r--r-- | internal/db/search.go | 32 |
6 files changed, 606 insertions, 0 deletions
diff --git a/internal/db/bundb/bundb.go b/internal/db/bundb/bundb.go index f0329e898..9d616954a 100644 --- a/internal/db/bundb/bundb.go +++ b/internal/db/bundb/bundb.go @@ -71,6 +71,7 @@ type DBService struct { db.Notification db.Relationship db.Report + db.Search db.Session db.Status db.StatusBookmark @@ -204,6 +205,10 @@ func NewBunDBService(ctx context.Context, state *state.State) (db.DB, error) { conn: conn, state: state, }, + Search: &searchDB{ + conn: conn, + state: state, + }, Session: &sessionDB{ conn: conn, }, diff --git a/internal/db/bundb/migrations/20230620103932_search_updates.go b/internal/db/bundb/migrations/20230620103932_search_updates.go new file mode 100644 index 000000000..0e26069a8 --- /dev/null +++ b/internal/db/bundb/migrations/20230620103932_search_updates.go @@ -0,0 +1,64 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package migrations + +import ( + "context" + + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/uptrace/bun" +) + +func init() { + up := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + // Drop previous in_reply_to_account_id index. + log.Info(ctx, "dropping previous statuses index, please wait and don't interrupt it (this may take a while)") + if _, err := tx. + NewDropIndex(). + Index("statuses_in_reply_to_account_id_idx"). + Exec(ctx); err != nil { + return err + } + + // Create new index to replace it, which also includes id DESC. + log.Info(ctx, "creating new statuses index, please wait and don't interrupt it (this may take a while)") + if _, err := tx. + NewCreateIndex(). + Table("statuses"). + Index("statuses_in_reply_to_account_id_id_idx"). + Column("in_reply_to_account_id"). + ColumnExpr("id DESC"). + Exec(ctx); err != nil { + return err + } + + return nil + }) + } + + down := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + return nil + }) + } + + if err := Migrations.Register(up, down); err != nil { + panic(err) + } +} diff --git a/internal/db/bundb/search.go b/internal/db/bundb/search.go new file mode 100644 index 000000000..c05ebb8b1 --- /dev/null +++ b/internal/db/bundb/search.go @@ -0,0 +1,422 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package bundb + +import ( + "context" + "strings" + + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/id" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/state" + "github.com/uptrace/bun" + "github.com/uptrace/bun/dialect" +) + +// todo: currently we pass an 'offset' parameter into functions owned by this struct, +// which is ignored. +// +// The idea of 'offset' is to allow callers to page through results without supplying +// maxID or minID params; they simply use the offset as more or less a 'page number'. +// This works fine when you're dealing with something like Elasticsearch, but for +// SQLite or Postgres 'LIKE' queries it doesn't really, because for each higher offset +// you have to calculate the value of all the previous offsets as well *within the +// execution time of the query*. It's MUCH more efficient to page using maxID and +// minID for queries like this. For now, then, we just ignore the offset and hope that +// the caller will page using maxID and minID instead. +// +// In future, however, it would be good to support offset in a way that doesn't totally +// destroy database queries. One option would be to cache previous offsets when paging +// down (which is the most common use case). +// +// For example, say a caller makes a call with offset 0: we run the query as normal, +// and in a 10 minute cache or something, store the next maxID value as it would be for +// offset 1, for the supplied query, limit, following, etc. Then when they call for +// offset 1, instead of supplying 'offset' in the query and causing slowdown, we check +// the cache to see if we have the next maxID value stored for that query, and use that +// instead. If a caller out of the blue requests offset 4 or something, on an empty cache, +// we could run the previous 4 queries and store the offsets for those before making the +// 5th call for page 4. +// +// This isn't ideal, of course, but at least we could cover the most common use case of +// a caller paging down through results. +type searchDB struct { + conn *DBConn + state *state.State +} + +// replacer is a thread-safe string replacer which escapes +// common SQLite + Postgres `LIKE` wildcard chars using the +// escape character `\`. Initialized as a var in this package +// so it can be reused. +var replacer = strings.NewReplacer( + `\`, `\\`, // Escape char. + `%`, `\%`, // Zero or more char. + `_`, `\_`, // Exactly one char. +) + +// whereSubqueryLike appends a WHERE clause to the +// given SelectQuery q, which searches for matches +// of searchQuery in the given subQuery using LIKE. +func whereSubqueryLike( + q *bun.SelectQuery, + subQuery *bun.SelectQuery, + searchQuery string, +) *bun.SelectQuery { + // Escape existing wildcard + escape + // chars in the search query string. + searchQuery = replacer.Replace(searchQuery) + + // Add our own wildcards back in; search + // zero or more chars around the query. + searchQuery = `%` + searchQuery + `%` + + // Append resulting WHERE + // clause to the main query. + return q.Where( + "(?) LIKE ? ESCAPE ?", + subQuery, searchQuery, `\`, + ) +} + +// Query example (SQLite): +// +// SELECT "account"."id" FROM "accounts" AS "account" +// WHERE (("account"."domain" IS NULL) OR ("account"."domain" != "account"."username")) +// AND ("account"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ') +// AND ("account"."id" IN (SELECT "target_account_id" FROM "follows" WHERE ("account_id" = '016T5Q3SQKBT337DAKVSKNXXW1'))) +// AND ((SELECT LOWER("account"."username" || COALESCE("account"."display_name", '') || COALESCE("account"."note", '')) AS "account_text") LIKE '%turtle%' ESCAPE '\') +// ORDER BY "account"."id" DESC LIMIT 10 +func (s *searchDB) SearchForAccounts( + ctx context.Context, + accountID string, + query string, + maxID string, + minID string, + limit int, + following bool, + offset int, +) ([]*gtsmodel.Account, error) { + // Ensure reasonable + if limit < 0 { + limit = 0 + } + + // Make educated guess for slice size + var ( + accountIDs = make([]string, 0, limit) + frontToBack = true + ) + + q := s.conn. + NewSelect(). + TableExpr("? AS ?", bun.Ident("accounts"), bun.Ident("account")). + // Select only IDs from table. + Column("account.id"). + // Try to ignore instance accounts. Account domain must + // be either nil or, if set, not equal to the account's + // username (which is commonly used to indicate it's an + // instance service account). + WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery { + return q. + Where("? IS NULL", bun.Ident("account.domain")). + WhereOr("? != ?", bun.Ident("account.domain"), bun.Ident("account.username")) + }) + + // Return only items with a LOWER id than maxID. + if maxID == "" { + maxID = id.Highest + } + q = q.Where("? < ?", bun.Ident("account.id"), maxID) + + if minID != "" { + // Return only items with a HIGHER id than minID. + q = q.Where("? > ?", bun.Ident("account.id"), minID) + + // page up + frontToBack = false + } + + if following { + // Select only from accounts followed by accountID. + q = q.Where( + "? IN (?)", + bun.Ident("account.id"), + s.followedAccounts(accountID), + ) + } + + // Select account text as subquery. + accountTextSubq := s.accountText(following) + + // Search using LIKE for matches of query + // string within accountText subquery. + q = whereSubqueryLike(q, accountTextSubq, query) + + if limit > 0 { + // Limit amount of accounts returned. + q = q.Limit(limit) + } + + if frontToBack { + // Page down. + q = q.Order("account.id DESC") + } else { + // Page up. + q = q.Order("account.id ASC") + } + + if err := q.Scan(ctx, &accountIDs); err != nil { + return nil, s.conn.ProcessError(err) + } + + if len(accountIDs) == 0 { + return nil, nil + } + + // If we're paging up, we still want accounts + // to be sorted by ID desc, so reverse ids slice. + // https://zchee.github.io/golang-wiki/SliceTricks/#reversing + if !frontToBack { + for l, r := 0, len(accountIDs)-1; l < r; l, r = l+1, r-1 { + accountIDs[l], accountIDs[r] = accountIDs[r], accountIDs[l] + } + } + + accounts := make([]*gtsmodel.Account, 0, len(accountIDs)) + for _, id := range accountIDs { + // Fetch account from db for ID + account, err := s.state.DB.GetAccountByID(ctx, id) + if err != nil { + log.Errorf(ctx, "error fetching account %q: %v", id, err) + continue + } + + // Append account to slice + accounts = append(accounts, account) + } + + return accounts, nil +} + +// followedAccounts returns a subquery that selects only IDs +// of accounts that are followed by the given accountID. +func (s *searchDB) followedAccounts(accountID string) *bun.SelectQuery { + return s.conn. + NewSelect(). + TableExpr("? AS ?", bun.Ident("follows"), bun.Ident("follow")). + Column("follow.target_account_id"). + Where("? = ?", bun.Ident("follow.account_id"), accountID) +} + +// statusText returns a subquery that selects a concatenation +// of account username and display name as "account_text". If +// `following` is true, then account note will also be included +// in the concatenation. +func (s *searchDB) accountText(following bool) *bun.SelectQuery { + var ( + accountText = s.conn.NewSelect() + query string + args []interface{} + ) + + if following { + // If querying for accounts we follow, + // include note in text search params. + args = []interface{}{ + bun.Ident("account.username"), + bun.Ident("account.display_name"), "", + bun.Ident("account.note"), "", + bun.Ident("account_text"), + } + } else { + // If querying for accounts we're not following, + // don't include note in text search params. + args = []interface{}{ + bun.Ident("account.username"), + bun.Ident("account.display_name"), "", + bun.Ident("account_text"), + } + } + + // SQLite and Postgres use different syntaxes for + // concatenation, and we also need to use a + // different number of placeholders depending on + // following/not following. COALESCE calls ensure + // that we're not trying to concatenate null values. + d := s.conn.Dialect().Name() + switch { + + case d == dialect.SQLite && following: + query = "LOWER(? || COALESCE(?, ?) || COALESCE(?, ?)) AS ?" + + case d == dialect.SQLite && !following: + query = "LOWER(? || COALESCE(?, ?)) AS ?" + + case d == dialect.PG && following: + query = "LOWER(CONCAT(?, COALESCE(?, ?), COALESCE(?, ?))) AS ?" + + case d == dialect.PG && !following: + query = "LOWER(CONCAT(?, COALESCE(?, ?))) AS ?" + + default: + panic("db conn was neither pg not sqlite") + } + + return accountText.ColumnExpr(query, args...) +} + +// Query example (SQLite): +// +// SELECT "status"."id" +// FROM "statuses" AS "status" +// WHERE ("status"."boost_of_id" IS NULL) +// AND (("status"."account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF') OR ("status"."in_reply_to_account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF')) +// AND ("status"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ') +// AND ((SELECT LOWER("status"."content" || COALESCE("status"."content_warning", '')) AS "status_text") LIKE '%hello%' ESCAPE '\') +// ORDER BY "status"."id" DESC LIMIT 10 +func (s *searchDB) SearchForStatuses( + ctx context.Context, + accountID string, + query string, + maxID string, + minID string, + limit int, + offset int, +) ([]*gtsmodel.Status, error) { + // Ensure reasonable + if limit < 0 { + limit = 0 + } + + // Make educated guess for slice size + var ( + statusIDs = make([]string, 0, limit) + frontToBack = true + ) + + q := s.conn. + NewSelect(). + TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")). + // Select only IDs from table + Column("status.id"). + // Ignore boosts. + Where("? IS NULL", bun.Ident("status.boost_of_id")). + // Select only statuses created by + // accountID or replying to accountID. + WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery { + return q. + Where("? = ?", bun.Ident("status.account_id"), accountID). + WhereOr("? = ?", bun.Ident("status.in_reply_to_account_id"), accountID) + }) + + // Return only items with a LOWER id than maxID. + if maxID == "" { + maxID = id.Highest + } + q = q.Where("? < ?", bun.Ident("status.id"), maxID) + + if minID != "" { + // return only statuses HIGHER (ie., newer) than minID + q = q.Where("? > ?", bun.Ident("status.id"), minID) + + // page up + frontToBack = false + } + + // Select status text as subquery. + statusTextSubq := s.statusText() + + // Search using LIKE for matches of query + // string within statusText subquery. + q = whereSubqueryLike(q, statusTextSubq, query) + + if limit > 0 { + // Limit amount of statuses returned. + q = q.Limit(limit) + } + + if frontToBack { + // Page down. + q = q.Order("status.id DESC") + } else { + // Page up. + q = q.Order("status.id ASC") + } + + if err := q.Scan(ctx, &statusIDs); err != nil { + return nil, s.conn.ProcessError(err) + } + + if len(statusIDs) == 0 { + return nil, nil + } + + // If we're paging up, we still want statuses + // to be sorted by ID desc, so reverse ids slice. + // https://zchee.github.io/golang-wiki/SliceTricks/#reversing + if !frontToBack { + for l, r := 0, len(statusIDs)-1; l < r; l, r = l+1, r-1 { + statusIDs[l], statusIDs[r] = statusIDs[r], statusIDs[l] + } + } + + statuses := make([]*gtsmodel.Status, 0, len(statusIDs)) + for _, id := range statusIDs { + // Fetch status from db for ID + status, err := s.state.DB.GetStatusByID(ctx, id) + if err != nil { + log.Errorf(ctx, "error fetching status %q: %v", id, err) + continue + } + + // Append status to slice + statuses = append(statuses, status) + } + + return statuses, nil +} + +// statusText returns a subquery that selects a concatenation +// of status content and content warning as "status_text". +func (s *searchDB) statusText() *bun.SelectQuery { + statusText := s.conn.NewSelect() + + // SQLite and Postgres use different + // syntaxes for concatenation. + switch s.conn.Dialect().Name() { + + case dialect.SQLite: + statusText = statusText.ColumnExpr( + "LOWER(? || COALESCE(?, ?)) AS ?", + bun.Ident("status.content"), bun.Ident("status.content_warning"), "", + bun.Ident("status_text")) + + case dialect.PG: + statusText = statusText.ColumnExpr( + "LOWER(CONCAT(?, COALESCE(?, ?))) AS ?", + bun.Ident("status.content"), bun.Ident("status.content_warning"), "", + bun.Ident("status_text")) + + default: + panic("db conn was neither pg not sqlite") + } + + return statusText +} diff --git a/internal/db/bundb/search_test.go b/internal/db/bundb/search_test.go new file mode 100644 index 000000000..d670c90d6 --- /dev/null +++ b/internal/db/bundb/search_test.go @@ -0,0 +1,82 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package bundb_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/db" +) + +type SearchTestSuite struct { + BunDBStandardTestSuite +} + +func (suite *SearchTestSuite) TestSearchAccountsTurtleAny() { + testAccount := suite.testAccounts["local_account_1"] + + accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "turtle", "", "", 10, false, 0) + suite.NoError(err) + suite.Len(accounts, 1) +} + +func (suite *SearchTestSuite) TestSearchAccountsTurtleFollowing() { + testAccount := suite.testAccounts["local_account_1"] + + accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "turtle", "", "", 10, true, 0) + suite.NoError(err) + suite.Len(accounts, 1) +} + +func (suite *SearchTestSuite) TestSearchAccountsPostFollowing() { + testAccount := suite.testAccounts["local_account_1"] + + accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "post", "", "", 10, true, 0) + suite.NoError(err) + suite.Len(accounts, 1) +} + +func (suite *SearchTestSuite) TestSearchAccountsPostAny() { + testAccount := suite.testAccounts["local_account_1"] + + accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "post", "", "", 10, false, 0) + suite.NoError(err, db.ErrNoEntries) + suite.Empty(accounts) +} + +func (suite *SearchTestSuite) TestSearchAccountsFossAny() { + testAccount := suite.testAccounts["local_account_1"] + + accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "foss", "", "", 10, false, 0) + suite.NoError(err) + suite.Len(accounts, 1) +} + +func (suite *SearchTestSuite) TestSearchStatuses() { + testAccount := suite.testAccounts["local_account_1"] + + statuses, err := suite.db.SearchForStatuses(context.Background(), testAccount.ID, "hello", "", "", 10, 0) + suite.NoError(err) + suite.Len(statuses, 1) +} + +func TestSearchTestSuite(t *testing.T) { + suite.Run(t, new(SearchTestSuite)) +} diff --git a/internal/db/db.go b/internal/db/db.go index f47a35bb3..f99bd212e 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -42,6 +42,7 @@ type DB interface { Notification Relationship Report + Search Session Status StatusBookmark diff --git a/internal/db/search.go b/internal/db/search.go new file mode 100644 index 000000000..b2ade0cfe --- /dev/null +++ b/internal/db/search.go @@ -0,0 +1,32 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package db + +import ( + "context" + + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +) + +type Search interface { + // SearchForAccounts uses the given query text to search for accounts that accountID follows. + SearchForAccounts(ctx context.Context, accountID string, query string, maxID string, minID string, limit int, following bool, offset int) ([]*gtsmodel.Account, error) + + // SearchForStatuses uses the given query text to search for statuses created by accountID, or in reply to accountID. + SearchForStatuses(ctx context.Context, accountID string, query string, maxID string, minID string, limit int, offset int) ([]*gtsmodel.Status, error) +} |