diff options
Diffstat (limited to 'internal/util')
-rw-r--r-- | internal/util/parse.go | 98 | ||||
-rw-r--r-- | internal/util/regexes.go | 36 | ||||
-rw-r--r-- | internal/util/status.go | 96 | ||||
-rw-r--r-- | internal/util/status_test.go | 105 | ||||
-rw-r--r-- | internal/util/validation.go | 10 |
5 files changed, 328 insertions, 17 deletions
diff --git a/internal/util/parse.go b/internal/util/parse.go index 375ab97f2..f0bcff5dc 100644 --- a/internal/util/parse.go +++ b/internal/util/parse.go @@ -1,32 +1,96 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + package util -import "fmt" +import ( + "fmt" + "github.com/superseriousbusiness/gotosocial/internal/db/gtsmodel" + mastotypes "github.com/superseriousbusiness/gotosocial/internal/mastotypes/mastomodel" +) + +// URIs contains a bunch of URIs and URLs for a user, host, account, etc. type URIs struct { - HostURL string - UserURL string + HostURL string + UserURL string + StatusesURL string + UserURI string - InboxURL string - OutboxURL string - FollowersURL string - CollectionURL string + StatusesURI string + InboxURI string + OutboxURI string + FollowersURI string + CollectionURI string } +// GenerateURIs throws together a bunch of URIs for the given username, with the given protocol and host. func GenerateURIs(username string, protocol string, host string) *URIs { hostURL := fmt.Sprintf("%s://%s", protocol, host) userURL := fmt.Sprintf("%s/@%s", hostURL, username) + statusesURL := fmt.Sprintf("%s/statuses", userURL) + userURI := fmt.Sprintf("%s/users/%s", hostURL, username) - inboxURL := fmt.Sprintf("%s/inbox", userURI) - outboxURL := fmt.Sprintf("%s/outbox", userURI) - followersURL := fmt.Sprintf("%s/followers", userURI) - collectionURL := fmt.Sprintf("%s/collections/featured", userURI) + statusesURI := fmt.Sprintf("%s/statuses", userURI) + inboxURI := fmt.Sprintf("%s/inbox", userURI) + outboxURI := fmt.Sprintf("%s/outbox", userURI) + followersURI := fmt.Sprintf("%s/followers", userURI) + collectionURI := fmt.Sprintf("%s/collections/featured", userURI) return &URIs{ - HostURL: hostURL, - UserURL: userURL, + HostURL: hostURL, + UserURL: userURL, + StatusesURL: statusesURL, + UserURI: userURI, - InboxURL: inboxURL, - OutboxURL: outboxURL, - FollowersURL: followersURL, - CollectionURL: collectionURL, + StatusesURI: statusesURI, + InboxURI: inboxURI, + OutboxURI: outboxURI, + FollowersURI: followersURI, + CollectionURI: collectionURI, + } +} + +// ParseGTSVisFromMastoVis converts a mastodon visibility into its gts equivalent. +func ParseGTSVisFromMastoVis(m mastotypes.Visibility) gtsmodel.Visibility { + switch m { + case mastotypes.VisibilityPublic: + return gtsmodel.VisibilityPublic + case mastotypes.VisibilityUnlisted: + return gtsmodel.VisibilityUnlocked + case mastotypes.VisibilityPrivate: + return gtsmodel.VisibilityFollowersOnly + case mastotypes.VisibilityDirect: + return gtsmodel.VisibilityDirect + } + return "" +} + +// ParseMastoVisFromGTSVis converts a gts visibility into its mastodon equivalent +func ParseMastoVisFromGTSVis(m gtsmodel.Visibility) mastotypes.Visibility { + switch m { + case gtsmodel.VisibilityPublic: + return mastotypes.VisibilityPublic + case gtsmodel.VisibilityUnlocked: + return mastotypes.VisibilityUnlisted + case gtsmodel.VisibilityFollowersOnly, gtsmodel.VisibilityMutualsOnly: + return mastotypes.VisibilityPrivate + case gtsmodel.VisibilityDirect: + return mastotypes.VisibilityDirect } + return "" } diff --git a/internal/util/regexes.go b/internal/util/regexes.go new file mode 100644 index 000000000..60b397d86 --- /dev/null +++ b/internal/util/regexes.go @@ -0,0 +1,36 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package util + +import "regexp" + +var ( + // mention regex can be played around with here: https://regex101.com/r/qwM9D3/1 + mentionRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?: |\n)` + mentionRegex = regexp.MustCompile(mentionRegexString) + // hashtag regex can be played with here: https://regex101.com/r/Vhy8pg/1 + hashtagRegexString = `(?: |^|\W)?#([a-zA-Z0-9]{1,30})(?:\b|\r)` + hashtagRegex = regexp.MustCompile(hashtagRegexString) + // emoji regex can be played with here: https://regex101.com/r/478XGM/1 + emojiRegexString = `(?: |^|\W)?:([a-zA-Z0-9_]{2,30}):(?:\b|\r)?` + emojiRegex = regexp.MustCompile(emojiRegexString) + // emoji shortcode regex can be played with here: https://regex101.com/r/zMDRaG/1 + emojiShortcodeString = `^[a-z0-9_]{2,30}$` + emojiShortcodeRegex = regexp.MustCompile(emojiShortcodeString) +) diff --git a/internal/util/status.go b/internal/util/status.go new file mode 100644 index 000000000..e4b3ec6a5 --- /dev/null +++ b/internal/util/status.go @@ -0,0 +1,96 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package util + +import ( + "strings" +) + +// DeriveMentions takes a plaintext (ie., not html-formatted) status, +// and applies a regex to it to return a deduplicated list of accounts +// mentioned in that status. +// +// It will look for fully-qualified account names in the form "@user@example.org". +// or the form "@username" for local users. +// The case of the returned mentions will be lowered, for consistency. +func DeriveMentions(status string) []string { + mentionedAccounts := []string{} + for _, m := range mentionRegex.FindAllStringSubmatch(status, -1) { + mentionedAccounts = append(mentionedAccounts, m[1]) + } + return Lower(Unique(mentionedAccounts)) +} + +// DeriveHashtags takes a plaintext (ie., not html-formatted) status, +// and applies a regex to it to return a deduplicated list of hashtags +// used in that status, without the leading #. The case of the returned +// tags will be lowered, for consistency. +func DeriveHashtags(status string) []string { + tags := []string{} + for _, m := range hashtagRegex.FindAllStringSubmatch(status, -1) { + tags = append(tags, m[1]) + } + return Lower(Unique(tags)) +} + +// DeriveEmojis takes a plaintext (ie., not html-formatted) status, +// and applies a regex to it to return a deduplicated list of emojis +// used in that status, without the surround ::. The case of the returned +// emojis will be lowered, for consistency. +func DeriveEmojis(status string) []string { + emojis := []string{} + for _, m := range emojiRegex.FindAllStringSubmatch(status, -1) { + emojis = append(emojis, m[1]) + } + return Lower(Unique(emojis)) +} + +// Unique returns a deduplicated version of a given string slice. +func Unique(s []string) []string { + keys := make(map[string]bool) + list := []string{} + for _, entry := range s { + if _, value := keys[entry]; !value { + keys[entry] = true + list = append(list, entry) + } + } + return list +} + +// Lower lowercases all strings in a given string slice +func Lower(s []string) []string { + new := []string{} + for _, i := range s { + new = append(new, strings.ToLower(i)) + } + return new +} + +// HTMLFormat takes a plaintext formatted status string, and converts it into +// a nice HTML-formatted string. +// +// This includes: +// - Replacing line-breaks with <p> +// - Replacing URLs with hrefs. +// - Replacing mentions with links to that account's URL as stored in the database. +func HTMLFormat(status string) string { + // TODO: write proper HTML formatting logic for a status + return status +} diff --git a/internal/util/status_test.go b/internal/util/status_test.go new file mode 100644 index 000000000..72bd3e885 --- /dev/null +++ b/internal/util/status_test.go @@ -0,0 +1,105 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package util + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +type StatusTestSuite struct { + suite.Suite +} + +func (suite *StatusTestSuite) TestDeriveMentionsOK() { + statusText := `@dumpsterqueer@example.org testing testing + + is this thing on? + + @someone_else@testing.best-horse.com can you confirm? @hello@test.lgbt + + @thisisalocaluser ! @NORWILL@THIS.one!! + + here is a duplicate mention: @hello@test.lgbt + ` + + menchies := DeriveMentions(statusText) + assert.Len(suite.T(), menchies, 4) + assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0]) + assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1]) + assert.Equal(suite.T(), "@hello@test.lgbt", menchies[2]) + assert.Equal(suite.T(), "@thisisalocaluser", menchies[3]) +} + +func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { + statusText := `` + menchies := DeriveMentions(statusText) + assert.Len(suite.T(), menchies, 0) +} + +func (suite *StatusTestSuite) TestDeriveHashtagsOK() { + statusText := `#testing123 #also testing + +# testing this one shouldn't work + + #thisshouldwork + +#ThisShouldAlsoWork #not_this_though + +#111111 thisalsoshouldn'twork#### ##` + + tags := DeriveHashtags(statusText) + assert.Len(suite.T(), tags, 5) + assert.Equal(suite.T(), "testing123", tags[0]) + assert.Equal(suite.T(), "also", tags[1]) + assert.Equal(suite.T(), "thisshouldwork", tags[2]) + assert.Equal(suite.T(), "thisshouldalsowork", tags[3]) + assert.Equal(suite.T(), "111111", tags[4]) +} + +func (suite *StatusTestSuite) TestDeriveEmojiOK() { + statusText := `:test: :another: + +Here's some normal text with an :emoji: at the end + +:spaces shouldnt work: + +:emoji1::emoji2: + +:anotheremoji:emoji2: +:anotheremoji::anotheremoji::anotheremoji::anotheremoji: +:underscores_ok_too: +` + + tags := DeriveEmojis(statusText) + assert.Len(suite.T(), tags, 7) + assert.Equal(suite.T(), "test", tags[0]) + assert.Equal(suite.T(), "another", tags[1]) + assert.Equal(suite.T(), "emoji", tags[2]) + assert.Equal(suite.T(), "emoji1", tags[3]) + assert.Equal(suite.T(), "emoji2", tags[4]) + assert.Equal(suite.T(), "anotheremoji", tags[5]) + assert.Equal(suite.T(), "underscores_ok_too", tags[6]) +} + +func TestStatusTestSuite(t *testing.T) { + suite.Run(t, new(StatusTestSuite)) +} diff --git a/internal/util/validation.go b/internal/util/validation.go index 88a56875c..8102bc35d 100644 --- a/internal/util/validation.go +++ b/internal/util/validation.go @@ -142,3 +142,13 @@ func ValidatePrivacy(privacy string) error { // TODO: add some validation logic here -- length, characters, etc return nil } + +// ValidateEmojiShortcode just runs the given shortcode through the regular expression +// for emoji shortcodes, to figure out whether it's a valid shortcode, ie., 2-30 characters, +// lowercase a-z, numbers, and underscores. +func ValidateEmojiShortcode(shortcode string) error { + if !emojiShortcodeRegex.MatchString(shortcode) { + return fmt.Errorf("shortcode %s did not pass validation, must be between 2 and 30 characters, lowercase letters, numbers, and underscores only", shortcode) + } + return nil +} |