summaryrefslogtreecommitdiff
path: root/internal/util
diff options
context:
space:
mode:
Diffstat (limited to 'internal/util')
-rw-r--r--internal/util/regexes.go17
-rw-r--r--internal/util/statustools.go24
-rw-r--r--internal/util/statustools_test.go15
-rw-r--r--internal/util/validation_test.go18
4 files changed, 44 insertions, 30 deletions
diff --git a/internal/util/regexes.go b/internal/util/regexes.go
index 1ca34708f..c03fd878c 100644
--- a/internal/util/regexes.go
+++ b/internal/util/regexes.go
@@ -30,25 +30,26 @@ const (
)
var (
- mentionNameRegexString = `^@([a-zA-Z0-9_]+)(?:@([a-zA-Z0-9_\-\.]+)?)$`
+ mentionNameRegexString = `^@(\w+)(?:@([a-zA-Z0-9_\-\.]+)?)$`
// mention name regex captures the username and domain part from a mention string
// such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols)
mentionNameRegex = regexp.MustCompile(mentionNameRegexString)
// mention regex can be played around with here: https://regex101.com/r/qwM9D3/1
- mentionFinderRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?:[^a-zA-Z0-9]|\W|$)?`
+ mentionFinderRegexString = `(?:\B)(@\w+(?:@[a-zA-Z0-9_\-\.]+)?)(?:\B)?`
mentionFinderRegex = regexp.MustCompile(mentionFinderRegexString)
- // hashtag regex can be played with here: https://regex101.com/r/Vhy8pg/1
- hashtagFinderRegexString = fmt.Sprintf(`(?:\b)?#(\w{1,%d})(?:\b)`, maximumHashtagLength)
- hashtagFinderRegex = regexp.MustCompile(hashtagFinderRegexString)
+ // hashtag regex can be played with here: https://regex101.com/r/bPxeca/1
+ hashtagFinderRegexString = fmt.Sprintf(`(?:^|\n|\s)(#[a-zA-Z0-9]{1,%d})(?:\b)`, maximumHashtagLength)
+ // HashtagFinderRegex finds possible hashtags in a string.
+ // It returns just the string part of the hashtag, not the # symbol.
+ HashtagFinderRegex = regexp.MustCompile(hashtagFinderRegexString)
- // emoji shortcode regex can be played with here: https://regex101.com/r/zMDRaG/1
- emojiShortcodeRegexString = fmt.Sprintf(`[a-z0-9_]{2,%d}`, maximumEmojiShortcodeLength)
+ emojiShortcodeRegexString = fmt.Sprintf(`\w{2,%d}`, maximumEmojiShortcodeLength)
emojiShortcodeValidationRegex = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcodeRegexString))
// emoji regex can be played with here: https://regex101.com/r/478XGM/1
- emojiFinderRegexString = fmt.Sprintf(`(?: |^|\W)?:(%s):(?:\b|\r)?`, emojiShortcodeRegexString)
+ emojiFinderRegexString = fmt.Sprintf(`(?:\B)?:(%s):(?:\B)?`, emojiShortcodeRegexString)
emojiFinderRegex = regexp.MustCompile(emojiFinderRegexString)
// usernameRegexString defines an acceptable username on this instance
diff --git a/internal/util/statustools.go b/internal/util/statustools.go
index b51f2c80c..93294da68 100644
--- a/internal/util/statustools.go
+++ b/internal/util/statustools.go
@@ -29,7 +29,6 @@ import (
//
// It will look for fully-qualified account names in the form "@user@example.org".
// or the form "@username" for local users.
-// The case of the returned mentions will be lowered, for consistency.
func DeriveMentionsFromStatus(status string) []string {
mentionedAccounts := []string{}
for _, m := range mentionFinderRegex.FindAllStringSubmatch(status, -1) {
@@ -44,16 +43,15 @@ func DeriveMentionsFromStatus(status string) []string {
// tags will be lowered, for consistency.
func DeriveHashtagsFromStatus(status string) []string {
tags := []string{}
- for _, m := range hashtagFinderRegex.FindAllStringSubmatch(status, -1) {
- tags = append(tags, m[1])
+ for _, m := range HashtagFinderRegex.FindAllStringSubmatch(status, -1) {
+ tags = append(tags, strings.TrimPrefix(m[1], "#"))
}
- return unique(tags)
+ return uniqueLower(tags)
}
// DeriveEmojisFromStatus takes a plaintext (ie., not html-formatted) status,
// and applies a regex to it to return a deduplicated list of emojis
-// used in that status, without the surround ::. The case of the returned
-// emojis will be lowered, for consistency.
+// used in that status, without the surround ::.
func DeriveEmojisFromStatus(status string) []string {
emojis := []string{}
for _, m := range emojiFinderRegex.FindAllStringSubmatch(status, -1) {
@@ -94,3 +92,17 @@ func unique(s []string) []string {
}
return list
}
+
+// uniqueLower returns a deduplicated version of a given string slice, with all entries converted to lowercase
+func uniqueLower(s []string) []string {
+ keys := make(map[string]bool)
+ list := []string{}
+ for _, entry := range s {
+ eLower := strings.ToLower(entry)
+ if _, value := keys[eLower]; !value {
+ keys[eLower] = true
+ list = append(list, eLower)
+ }
+ }
+ return list
+}
diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go
index 2a12c7690..5bdce2d5a 100644
--- a/internal/util/statustools_test.go
+++ b/internal/util/statustools_test.go
@@ -37,17 +37,22 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() {
@someone_else@testing.best-horse.com can you confirm? @hello@test.lgbt
- @thisisalocaluser ! @NORWILL@THIS.one!!
+ @thisisalocaluser!
+
+ here is a duplicate mention: @hello@test.lgbt @hello@test.lgbt
+
+ @account1@whatever.com @account2@whatever.com
- here is a duplicate mention: @hello@test.lgbt
`
menchies := util.DeriveMentionsFromStatus(statusText)
- assert.Len(suite.T(), menchies, 4)
+ assert.Len(suite.T(), menchies, 6)
assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0])
assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1])
assert.Equal(suite.T(), "@hello@test.lgbt", menchies[2])
assert.Equal(suite.T(), "@thisisalocaluser", menchies[3])
+ assert.Equal(suite.T(), "@account1@whatever.com", menchies[4])
+ assert.Equal(suite.T(), "@account2@whatever.com", menchies[5])
}
func (suite *StatusTestSuite) TestDeriveMentionsEmpty() {
@@ -57,12 +62,14 @@ func (suite *StatusTestSuite) TestDeriveMentionsEmpty() {
}
func (suite *StatusTestSuite) TestDeriveHashtagsOK() {
- statusText := `#testing123 #also testing
+ statusText := `weeeeeeee #testing123 #also testing
# testing this one shouldn't work
#thisshouldwork
+ here's a link with a fragment: https://example.org/whatever#ahhh
+
#ThisShouldAlsoWork #not_this_though
#111111 thisalsoshouldn'twork#### ##`
diff --git a/internal/util/validation_test.go b/internal/util/validation_test.go
index 73f5cb977..639a89bbd 100644
--- a/internal/util/validation_test.go
+++ b/internal/util/validation_test.go
@@ -102,32 +102,32 @@ func (suite *ValidationTestSuite) TestValidateUsername() {
err = util.ValidateUsername(tooLong)
if assert.Error(suite.T(), err) {
- assert.Equal(suite.T(), fmt.Errorf("username should be no more than 64 chars but '%s' was 66", tooLong), err)
+ assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", tooLong), err)
}
err = util.ValidateUsername(withSpaces)
if assert.Error(suite.T(), err) {
- assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", withSpaces), err)
+ assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", withSpaces), err)
}
err = util.ValidateUsername(weirdChars)
if assert.Error(suite.T(), err) {
- assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", weirdChars), err)
+ assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", weirdChars), err)
}
err = util.ValidateUsername(leadingSpace)
if assert.Error(suite.T(), err) {
- assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", leadingSpace), err)
+ assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", leadingSpace), err)
}
err = util.ValidateUsername(trailingSpace)
if assert.Error(suite.T(), err) {
- assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", trailingSpace), err)
+ assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", trailingSpace), err)
}
err = util.ValidateUsername(newlines)
if assert.Error(suite.T(), err) {
- assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", newlines), err)
+ assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", newlines), err)
}
err = util.ValidateUsername(goodUsername)
@@ -141,7 +141,6 @@ func (suite *ValidationTestSuite) TestValidateEmail() {
notAnEmailAddress := "this-is-no-email-address!"
almostAnEmailAddress := "@thisisalmostan@email.address"
aWebsite := "https://thisisawebsite.com"
- tooLong := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaahhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhh@gmail.com"
emailAddress := "thisis.actually@anemail.address"
var err error
@@ -165,11 +164,6 @@ func (suite *ValidationTestSuite) TestValidateEmail() {
assert.Equal(suite.T(), errors.New("mail: missing '@' or angle-addr"), err)
}
- err = util.ValidateEmail(tooLong)
- if assert.Error(suite.T(), err) {
- assert.Equal(suite.T(), fmt.Errorf("email address should be no more than 256 chars but '%s' was 286", tooLong), err)
- }
-
err = util.ValidateEmail(emailAddress)
if assert.NoError(suite.T(), err) {
assert.Equal(suite.T(), nil, err)