summaryrefslogtreecommitdiff
path: root/internal/db
diff options
context:
space:
mode:
authorLibravatar tobi <31960611+tsmethurst@users.noreply.github.com>2023-06-21 18:26:40 +0200
committerLibravatar GitHub <noreply@github.com>2023-06-21 17:26:40 +0100
commit831ae09f8bab04af854243421047371339c3e190 (patch)
treef7709d478cc363dc1899bdb658fe20e2dc7986f3 /internal/db
parent[docs] Disambiguate docker version, don't recommend opening localhost (#1913) (diff)
downloadgotosocial-831ae09f8bab04af854243421047371339c3e190.tar.xz
[feature] Add partial text search for accounts + statuses (#1836)
Diffstat (limited to 'internal/db')
-rw-r--r--internal/db/bundb/bundb.go5
-rw-r--r--internal/db/bundb/migrations/20230620103932_search_updates.go64
-rw-r--r--internal/db/bundb/search.go422
-rw-r--r--internal/db/bundb/search_test.go82
-rw-r--r--internal/db/db.go1
-rw-r--r--internal/db/search.go32
6 files changed, 606 insertions, 0 deletions
diff --git a/internal/db/bundb/bundb.go b/internal/db/bundb/bundb.go
index f0329e898..9d616954a 100644
--- a/internal/db/bundb/bundb.go
+++ b/internal/db/bundb/bundb.go
@@ -71,6 +71,7 @@ type DBService struct {
db.Notification
db.Relationship
db.Report
+ db.Search
db.Session
db.Status
db.StatusBookmark
@@ -204,6 +205,10 @@ func NewBunDBService(ctx context.Context, state *state.State) (db.DB, error) {
conn: conn,
state: state,
},
+ Search: &searchDB{
+ conn: conn,
+ state: state,
+ },
Session: &sessionDB{
conn: conn,
},
diff --git a/internal/db/bundb/migrations/20230620103932_search_updates.go b/internal/db/bundb/migrations/20230620103932_search_updates.go
new file mode 100644
index 000000000..0e26069a8
--- /dev/null
+++ b/internal/db/bundb/migrations/20230620103932_search_updates.go
@@ -0,0 +1,64 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package migrations
+
+import (
+ "context"
+
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/uptrace/bun"
+)
+
+func init() {
+ up := func(ctx context.Context, db *bun.DB) error {
+ return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
+ // Drop previous in_reply_to_account_id index.
+ log.Info(ctx, "dropping previous statuses index, please wait and don't interrupt it (this may take a while)")
+ if _, err := tx.
+ NewDropIndex().
+ Index("statuses_in_reply_to_account_id_idx").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ // Create new index to replace it, which also includes id DESC.
+ log.Info(ctx, "creating new statuses index, please wait and don't interrupt it (this may take a while)")
+ if _, err := tx.
+ NewCreateIndex().
+ Table("statuses").
+ Index("statuses_in_reply_to_account_id_id_idx").
+ Column("in_reply_to_account_id").
+ ColumnExpr("id DESC").
+ Exec(ctx); err != nil {
+ return err
+ }
+
+ return nil
+ })
+ }
+
+ down := func(ctx context.Context, db *bun.DB) error {
+ return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error {
+ return nil
+ })
+ }
+
+ if err := Migrations.Register(up, down); err != nil {
+ panic(err)
+ }
+}
diff --git a/internal/db/bundb/search.go b/internal/db/bundb/search.go
new file mode 100644
index 000000000..c05ebb8b1
--- /dev/null
+++ b/internal/db/bundb/search.go
@@ -0,0 +1,422 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package bundb
+
+import (
+ "context"
+ "strings"
+
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/id"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/superseriousbusiness/gotosocial/internal/state"
+ "github.com/uptrace/bun"
+ "github.com/uptrace/bun/dialect"
+)
+
+// todo: currently we pass an 'offset' parameter into functions owned by this struct,
+// which is ignored.
+//
+// The idea of 'offset' is to allow callers to page through results without supplying
+// maxID or minID params; they simply use the offset as more or less a 'page number'.
+// This works fine when you're dealing with something like Elasticsearch, but for
+// SQLite or Postgres 'LIKE' queries it doesn't really, because for each higher offset
+// you have to calculate the value of all the previous offsets as well *within the
+// execution time of the query*. It's MUCH more efficient to page using maxID and
+// minID for queries like this. For now, then, we just ignore the offset and hope that
+// the caller will page using maxID and minID instead.
+//
+// In future, however, it would be good to support offset in a way that doesn't totally
+// destroy database queries. One option would be to cache previous offsets when paging
+// down (which is the most common use case).
+//
+// For example, say a caller makes a call with offset 0: we run the query as normal,
+// and in a 10 minute cache or something, store the next maxID value as it would be for
+// offset 1, for the supplied query, limit, following, etc. Then when they call for
+// offset 1, instead of supplying 'offset' in the query and causing slowdown, we check
+// the cache to see if we have the next maxID value stored for that query, and use that
+// instead. If a caller out of the blue requests offset 4 or something, on an empty cache,
+// we could run the previous 4 queries and store the offsets for those before making the
+// 5th call for page 4.
+//
+// This isn't ideal, of course, but at least we could cover the most common use case of
+// a caller paging down through results.
+type searchDB struct {
+ conn *DBConn
+ state *state.State
+}
+
+// replacer is a thread-safe string replacer which escapes
+// common SQLite + Postgres `LIKE` wildcard chars using the
+// escape character `\`. Initialized as a var in this package
+// so it can be reused.
+var replacer = strings.NewReplacer(
+ `\`, `\\`, // Escape char.
+ `%`, `\%`, // Zero or more char.
+ `_`, `\_`, // Exactly one char.
+)
+
+// whereSubqueryLike appends a WHERE clause to the
+// given SelectQuery q, which searches for matches
+// of searchQuery in the given subQuery using LIKE.
+func whereSubqueryLike(
+ q *bun.SelectQuery,
+ subQuery *bun.SelectQuery,
+ searchQuery string,
+) *bun.SelectQuery {
+ // Escape existing wildcard + escape
+ // chars in the search query string.
+ searchQuery = replacer.Replace(searchQuery)
+
+ // Add our own wildcards back in; search
+ // zero or more chars around the query.
+ searchQuery = `%` + searchQuery + `%`
+
+ // Append resulting WHERE
+ // clause to the main query.
+ return q.Where(
+ "(?) LIKE ? ESCAPE ?",
+ subQuery, searchQuery, `\`,
+ )
+}
+
+// Query example (SQLite):
+//
+// SELECT "account"."id" FROM "accounts" AS "account"
+// WHERE (("account"."domain" IS NULL) OR ("account"."domain" != "account"."username"))
+// AND ("account"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ')
+// AND ("account"."id" IN (SELECT "target_account_id" FROM "follows" WHERE ("account_id" = '016T5Q3SQKBT337DAKVSKNXXW1')))
+// AND ((SELECT LOWER("account"."username" || COALESCE("account"."display_name", '') || COALESCE("account"."note", '')) AS "account_text") LIKE '%turtle%' ESCAPE '\')
+// ORDER BY "account"."id" DESC LIMIT 10
+func (s *searchDB) SearchForAccounts(
+ ctx context.Context,
+ accountID string,
+ query string,
+ maxID string,
+ minID string,
+ limit int,
+ following bool,
+ offset int,
+) ([]*gtsmodel.Account, error) {
+ // Ensure reasonable
+ if limit < 0 {
+ limit = 0
+ }
+
+ // Make educated guess for slice size
+ var (
+ accountIDs = make([]string, 0, limit)
+ frontToBack = true
+ )
+
+ q := s.conn.
+ NewSelect().
+ TableExpr("? AS ?", bun.Ident("accounts"), bun.Ident("account")).
+ // Select only IDs from table.
+ Column("account.id").
+ // Try to ignore instance accounts. Account domain must
+ // be either nil or, if set, not equal to the account's
+ // username (which is commonly used to indicate it's an
+ // instance service account).
+ WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery {
+ return q.
+ Where("? IS NULL", bun.Ident("account.domain")).
+ WhereOr("? != ?", bun.Ident("account.domain"), bun.Ident("account.username"))
+ })
+
+ // Return only items with a LOWER id than maxID.
+ if maxID == "" {
+ maxID = id.Highest
+ }
+ q = q.Where("? < ?", bun.Ident("account.id"), maxID)
+
+ if minID != "" {
+ // Return only items with a HIGHER id than minID.
+ q = q.Where("? > ?", bun.Ident("account.id"), minID)
+
+ // page up
+ frontToBack = false
+ }
+
+ if following {
+ // Select only from accounts followed by accountID.
+ q = q.Where(
+ "? IN (?)",
+ bun.Ident("account.id"),
+ s.followedAccounts(accountID),
+ )
+ }
+
+ // Select account text as subquery.
+ accountTextSubq := s.accountText(following)
+
+ // Search using LIKE for matches of query
+ // string within accountText subquery.
+ q = whereSubqueryLike(q, accountTextSubq, query)
+
+ if limit > 0 {
+ // Limit amount of accounts returned.
+ q = q.Limit(limit)
+ }
+
+ if frontToBack {
+ // Page down.
+ q = q.Order("account.id DESC")
+ } else {
+ // Page up.
+ q = q.Order("account.id ASC")
+ }
+
+ if err := q.Scan(ctx, &accountIDs); err != nil {
+ return nil, s.conn.ProcessError(err)
+ }
+
+ if len(accountIDs) == 0 {
+ return nil, nil
+ }
+
+ // If we're paging up, we still want accounts
+ // to be sorted by ID desc, so reverse ids slice.
+ // https://zchee.github.io/golang-wiki/SliceTricks/#reversing
+ if !frontToBack {
+ for l, r := 0, len(accountIDs)-1; l < r; l, r = l+1, r-1 {
+ accountIDs[l], accountIDs[r] = accountIDs[r], accountIDs[l]
+ }
+ }
+
+ accounts := make([]*gtsmodel.Account, 0, len(accountIDs))
+ for _, id := range accountIDs {
+ // Fetch account from db for ID
+ account, err := s.state.DB.GetAccountByID(ctx, id)
+ if err != nil {
+ log.Errorf(ctx, "error fetching account %q: %v", id, err)
+ continue
+ }
+
+ // Append account to slice
+ accounts = append(accounts, account)
+ }
+
+ return accounts, nil
+}
+
+// followedAccounts returns a subquery that selects only IDs
+// of accounts that are followed by the given accountID.
+func (s *searchDB) followedAccounts(accountID string) *bun.SelectQuery {
+ return s.conn.
+ NewSelect().
+ TableExpr("? AS ?", bun.Ident("follows"), bun.Ident("follow")).
+ Column("follow.target_account_id").
+ Where("? = ?", bun.Ident("follow.account_id"), accountID)
+}
+
+// statusText returns a subquery that selects a concatenation
+// of account username and display name as "account_text". If
+// `following` is true, then account note will also be included
+// in the concatenation.
+func (s *searchDB) accountText(following bool) *bun.SelectQuery {
+ var (
+ accountText = s.conn.NewSelect()
+ query string
+ args []interface{}
+ )
+
+ if following {
+ // If querying for accounts we follow,
+ // include note in text search params.
+ args = []interface{}{
+ bun.Ident("account.username"),
+ bun.Ident("account.display_name"), "",
+ bun.Ident("account.note"), "",
+ bun.Ident("account_text"),
+ }
+ } else {
+ // If querying for accounts we're not following,
+ // don't include note in text search params.
+ args = []interface{}{
+ bun.Ident("account.username"),
+ bun.Ident("account.display_name"), "",
+ bun.Ident("account_text"),
+ }
+ }
+
+ // SQLite and Postgres use different syntaxes for
+ // concatenation, and we also need to use a
+ // different number of placeholders depending on
+ // following/not following. COALESCE calls ensure
+ // that we're not trying to concatenate null values.
+ d := s.conn.Dialect().Name()
+ switch {
+
+ case d == dialect.SQLite && following:
+ query = "LOWER(? || COALESCE(?, ?) || COALESCE(?, ?)) AS ?"
+
+ case d == dialect.SQLite && !following:
+ query = "LOWER(? || COALESCE(?, ?)) AS ?"
+
+ case d == dialect.PG && following:
+ query = "LOWER(CONCAT(?, COALESCE(?, ?), COALESCE(?, ?))) AS ?"
+
+ case d == dialect.PG && !following:
+ query = "LOWER(CONCAT(?, COALESCE(?, ?))) AS ?"
+
+ default:
+ panic("db conn was neither pg not sqlite")
+ }
+
+ return accountText.ColumnExpr(query, args...)
+}
+
+// Query example (SQLite):
+//
+// SELECT "status"."id"
+// FROM "statuses" AS "status"
+// WHERE ("status"."boost_of_id" IS NULL)
+// AND (("status"."account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF') OR ("status"."in_reply_to_account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF'))
+// AND ("status"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ')
+// AND ((SELECT LOWER("status"."content" || COALESCE("status"."content_warning", '')) AS "status_text") LIKE '%hello%' ESCAPE '\')
+// ORDER BY "status"."id" DESC LIMIT 10
+func (s *searchDB) SearchForStatuses(
+ ctx context.Context,
+ accountID string,
+ query string,
+ maxID string,
+ minID string,
+ limit int,
+ offset int,
+) ([]*gtsmodel.Status, error) {
+ // Ensure reasonable
+ if limit < 0 {
+ limit = 0
+ }
+
+ // Make educated guess for slice size
+ var (
+ statusIDs = make([]string, 0, limit)
+ frontToBack = true
+ )
+
+ q := s.conn.
+ NewSelect().
+ TableExpr("? AS ?", bun.Ident("statuses"), bun.Ident("status")).
+ // Select only IDs from table
+ Column("status.id").
+ // Ignore boosts.
+ Where("? IS NULL", bun.Ident("status.boost_of_id")).
+ // Select only statuses created by
+ // accountID or replying to accountID.
+ WhereGroup(" AND ", func(q *bun.SelectQuery) *bun.SelectQuery {
+ return q.
+ Where("? = ?", bun.Ident("status.account_id"), accountID).
+ WhereOr("? = ?", bun.Ident("status.in_reply_to_account_id"), accountID)
+ })
+
+ // Return only items with a LOWER id than maxID.
+ if maxID == "" {
+ maxID = id.Highest
+ }
+ q = q.Where("? < ?", bun.Ident("status.id"), maxID)
+
+ if minID != "" {
+ // return only statuses HIGHER (ie., newer) than minID
+ q = q.Where("? > ?", bun.Ident("status.id"), minID)
+
+ // page up
+ frontToBack = false
+ }
+
+ // Select status text as subquery.
+ statusTextSubq := s.statusText()
+
+ // Search using LIKE for matches of query
+ // string within statusText subquery.
+ q = whereSubqueryLike(q, statusTextSubq, query)
+
+ if limit > 0 {
+ // Limit amount of statuses returned.
+ q = q.Limit(limit)
+ }
+
+ if frontToBack {
+ // Page down.
+ q = q.Order("status.id DESC")
+ } else {
+ // Page up.
+ q = q.Order("status.id ASC")
+ }
+
+ if err := q.Scan(ctx, &statusIDs); err != nil {
+ return nil, s.conn.ProcessError(err)
+ }
+
+ if len(statusIDs) == 0 {
+ return nil, nil
+ }
+
+ // If we're paging up, we still want statuses
+ // to be sorted by ID desc, so reverse ids slice.
+ // https://zchee.github.io/golang-wiki/SliceTricks/#reversing
+ if !frontToBack {
+ for l, r := 0, len(statusIDs)-1; l < r; l, r = l+1, r-1 {
+ statusIDs[l], statusIDs[r] = statusIDs[r], statusIDs[l]
+ }
+ }
+
+ statuses := make([]*gtsmodel.Status, 0, len(statusIDs))
+ for _, id := range statusIDs {
+ // Fetch status from db for ID
+ status, err := s.state.DB.GetStatusByID(ctx, id)
+ if err != nil {
+ log.Errorf(ctx, "error fetching status %q: %v", id, err)
+ continue
+ }
+
+ // Append status to slice
+ statuses = append(statuses, status)
+ }
+
+ return statuses, nil
+}
+
+// statusText returns a subquery that selects a concatenation
+// of status content and content warning as "status_text".
+func (s *searchDB) statusText() *bun.SelectQuery {
+ statusText := s.conn.NewSelect()
+
+ // SQLite and Postgres use different
+ // syntaxes for concatenation.
+ switch s.conn.Dialect().Name() {
+
+ case dialect.SQLite:
+ statusText = statusText.ColumnExpr(
+ "LOWER(? || COALESCE(?, ?)) AS ?",
+ bun.Ident("status.content"), bun.Ident("status.content_warning"), "",
+ bun.Ident("status_text"))
+
+ case dialect.PG:
+ statusText = statusText.ColumnExpr(
+ "LOWER(CONCAT(?, COALESCE(?, ?))) AS ?",
+ bun.Ident("status.content"), bun.Ident("status.content_warning"), "",
+ bun.Ident("status_text"))
+
+ default:
+ panic("db conn was neither pg not sqlite")
+ }
+
+ return statusText
+}
diff --git a/internal/db/bundb/search_test.go b/internal/db/bundb/search_test.go
new file mode 100644
index 000000000..d670c90d6
--- /dev/null
+++ b/internal/db/bundb/search_test.go
@@ -0,0 +1,82 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package bundb_test
+
+import (
+ "context"
+ "testing"
+
+ "github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+)
+
+type SearchTestSuite struct {
+ BunDBStandardTestSuite
+}
+
+func (suite *SearchTestSuite) TestSearchAccountsTurtleAny() {
+ testAccount := suite.testAccounts["local_account_1"]
+
+ accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "turtle", "", "", 10, false, 0)
+ suite.NoError(err)
+ suite.Len(accounts, 1)
+}
+
+func (suite *SearchTestSuite) TestSearchAccountsTurtleFollowing() {
+ testAccount := suite.testAccounts["local_account_1"]
+
+ accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "turtle", "", "", 10, true, 0)
+ suite.NoError(err)
+ suite.Len(accounts, 1)
+}
+
+func (suite *SearchTestSuite) TestSearchAccountsPostFollowing() {
+ testAccount := suite.testAccounts["local_account_1"]
+
+ accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "post", "", "", 10, true, 0)
+ suite.NoError(err)
+ suite.Len(accounts, 1)
+}
+
+func (suite *SearchTestSuite) TestSearchAccountsPostAny() {
+ testAccount := suite.testAccounts["local_account_1"]
+
+ accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "post", "", "", 10, false, 0)
+ suite.NoError(err, db.ErrNoEntries)
+ suite.Empty(accounts)
+}
+
+func (suite *SearchTestSuite) TestSearchAccountsFossAny() {
+ testAccount := suite.testAccounts["local_account_1"]
+
+ accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "foss", "", "", 10, false, 0)
+ suite.NoError(err)
+ suite.Len(accounts, 1)
+}
+
+func (suite *SearchTestSuite) TestSearchStatuses() {
+ testAccount := suite.testAccounts["local_account_1"]
+
+ statuses, err := suite.db.SearchForStatuses(context.Background(), testAccount.ID, "hello", "", "", 10, 0)
+ suite.NoError(err)
+ suite.Len(statuses, 1)
+}
+
+func TestSearchTestSuite(t *testing.T) {
+ suite.Run(t, new(SearchTestSuite))
+}
diff --git a/internal/db/db.go b/internal/db/db.go
index f47a35bb3..f99bd212e 100644
--- a/internal/db/db.go
+++ b/internal/db/db.go
@@ -42,6 +42,7 @@ type DB interface {
Notification
Relationship
Report
+ Search
Session
Status
StatusBookmark
diff --git a/internal/db/search.go b/internal/db/search.go
new file mode 100644
index 000000000..b2ade0cfe
--- /dev/null
+++ b/internal/db/search.go
@@ -0,0 +1,32 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package db
+
+import (
+ "context"
+
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+)
+
+type Search interface {
+ // SearchForAccounts uses the given query text to search for accounts that accountID follows.
+ SearchForAccounts(ctx context.Context, accountID string, query string, maxID string, minID string, limit int, following bool, offset int) ([]*gtsmodel.Account, error)
+
+ // SearchForStatuses uses the given query text to search for statuses created by accountID, or in reply to accountID.
+ SearchForStatuses(ctx context.Context, accountID string, query string, maxID string, minID string, limit int, offset int) ([]*gtsmodel.Status, error)
+}