diff options
Diffstat (limited to 'internal/util')
-rw-r--r-- | internal/util/regexes.go | 17 | ||||
-rw-r--r-- | internal/util/statustools.go | 24 | ||||
-rw-r--r-- | internal/util/statustools_test.go | 15 | ||||
-rw-r--r-- | internal/util/validation_test.go | 18 |
4 files changed, 44 insertions, 30 deletions
diff --git a/internal/util/regexes.go b/internal/util/regexes.go index 1ca34708f..c03fd878c 100644 --- a/internal/util/regexes.go +++ b/internal/util/regexes.go @@ -30,25 +30,26 @@ const ( ) var ( - mentionNameRegexString = `^@([a-zA-Z0-9_]+)(?:@([a-zA-Z0-9_\-\.]+)?)$` + mentionNameRegexString = `^@(\w+)(?:@([a-zA-Z0-9_\-\.]+)?)$` // mention name regex captures the username and domain part from a mention string // such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols) mentionNameRegex = regexp.MustCompile(mentionNameRegexString) // mention regex can be played around with here: https://regex101.com/r/qwM9D3/1 - mentionFinderRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?:[^a-zA-Z0-9]|\W|$)?` + mentionFinderRegexString = `(?:\B)(@\w+(?:@[a-zA-Z0-9_\-\.]+)?)(?:\B)?` mentionFinderRegex = regexp.MustCompile(mentionFinderRegexString) - // hashtag regex can be played with here: https://regex101.com/r/Vhy8pg/1 - hashtagFinderRegexString = fmt.Sprintf(`(?:\b)?#(\w{1,%d})(?:\b)`, maximumHashtagLength) - hashtagFinderRegex = regexp.MustCompile(hashtagFinderRegexString) + // hashtag regex can be played with here: https://regex101.com/r/bPxeca/1 + hashtagFinderRegexString = fmt.Sprintf(`(?:^|\n|\s)(#[a-zA-Z0-9]{1,%d})(?:\b)`, maximumHashtagLength) + // HashtagFinderRegex finds possible hashtags in a string. + // It returns just the string part of the hashtag, not the # symbol. + HashtagFinderRegex = regexp.MustCompile(hashtagFinderRegexString) - // emoji shortcode regex can be played with here: https://regex101.com/r/zMDRaG/1 - emojiShortcodeRegexString = fmt.Sprintf(`[a-z0-9_]{2,%d}`, maximumEmojiShortcodeLength) + emojiShortcodeRegexString = fmt.Sprintf(`\w{2,%d}`, maximumEmojiShortcodeLength) emojiShortcodeValidationRegex = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcodeRegexString)) // emoji regex can be played with here: https://regex101.com/r/478XGM/1 - emojiFinderRegexString = fmt.Sprintf(`(?: |^|\W)?:(%s):(?:\b|\r)?`, emojiShortcodeRegexString) + emojiFinderRegexString = fmt.Sprintf(`(?:\B)?:(%s):(?:\B)?`, emojiShortcodeRegexString) emojiFinderRegex = regexp.MustCompile(emojiFinderRegexString) // usernameRegexString defines an acceptable username on this instance diff --git a/internal/util/statustools.go b/internal/util/statustools.go index b51f2c80c..93294da68 100644 --- a/internal/util/statustools.go +++ b/internal/util/statustools.go @@ -29,7 +29,6 @@ import ( // // It will look for fully-qualified account names in the form "@user@example.org". // or the form "@username" for local users. -// The case of the returned mentions will be lowered, for consistency. func DeriveMentionsFromStatus(status string) []string { mentionedAccounts := []string{} for _, m := range mentionFinderRegex.FindAllStringSubmatch(status, -1) { @@ -44,16 +43,15 @@ func DeriveMentionsFromStatus(status string) []string { // tags will be lowered, for consistency. func DeriveHashtagsFromStatus(status string) []string { tags := []string{} - for _, m := range hashtagFinderRegex.FindAllStringSubmatch(status, -1) { - tags = append(tags, m[1]) + for _, m := range HashtagFinderRegex.FindAllStringSubmatch(status, -1) { + tags = append(tags, strings.TrimPrefix(m[1], "#")) } - return unique(tags) + return uniqueLower(tags) } // DeriveEmojisFromStatus takes a plaintext (ie., not html-formatted) status, // and applies a regex to it to return a deduplicated list of emojis -// used in that status, without the surround ::. The case of the returned -// emojis will be lowered, for consistency. +// used in that status, without the surround ::. func DeriveEmojisFromStatus(status string) []string { emojis := []string{} for _, m := range emojiFinderRegex.FindAllStringSubmatch(status, -1) { @@ -94,3 +92,17 @@ func unique(s []string) []string { } return list } + +// uniqueLower returns a deduplicated version of a given string slice, with all entries converted to lowercase +func uniqueLower(s []string) []string { + keys := make(map[string]bool) + list := []string{} + for _, entry := range s { + eLower := strings.ToLower(entry) + if _, value := keys[eLower]; !value { + keys[eLower] = true + list = append(list, eLower) + } + } + return list +} diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go index 2a12c7690..5bdce2d5a 100644 --- a/internal/util/statustools_test.go +++ b/internal/util/statustools_test.go @@ -37,17 +37,22 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() { @someone_else@testing.best-horse.com can you confirm? @hello@test.lgbt - @thisisalocaluser ! @NORWILL@THIS.one!! + @thisisalocaluser! + + here is a duplicate mention: @hello@test.lgbt @hello@test.lgbt + + @account1@whatever.com @account2@whatever.com - here is a duplicate mention: @hello@test.lgbt ` menchies := util.DeriveMentionsFromStatus(statusText) - assert.Len(suite.T(), menchies, 4) + assert.Len(suite.T(), menchies, 6) assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0]) assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1]) assert.Equal(suite.T(), "@hello@test.lgbt", menchies[2]) assert.Equal(suite.T(), "@thisisalocaluser", menchies[3]) + assert.Equal(suite.T(), "@account1@whatever.com", menchies[4]) + assert.Equal(suite.T(), "@account2@whatever.com", menchies[5]) } func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { @@ -57,12 +62,14 @@ func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { } func (suite *StatusTestSuite) TestDeriveHashtagsOK() { - statusText := `#testing123 #also testing + statusText := `weeeeeeee #testing123 #also testing # testing this one shouldn't work #thisshouldwork + here's a link with a fragment: https://example.org/whatever#ahhh + #ThisShouldAlsoWork #not_this_though #111111 thisalsoshouldn'twork#### ##` diff --git a/internal/util/validation_test.go b/internal/util/validation_test.go index 73f5cb977..639a89bbd 100644 --- a/internal/util/validation_test.go +++ b/internal/util/validation_test.go @@ -102,32 +102,32 @@ func (suite *ValidationTestSuite) TestValidateUsername() { err = util.ValidateUsername(tooLong) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("username should be no more than 64 chars but '%s' was 66", tooLong), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", tooLong), err) } err = util.ValidateUsername(withSpaces) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", withSpaces), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", withSpaces), err) } err = util.ValidateUsername(weirdChars) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", weirdChars), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", weirdChars), err) } err = util.ValidateUsername(leadingSpace) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", leadingSpace), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", leadingSpace), err) } err = util.ValidateUsername(trailingSpace) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", trailingSpace), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", trailingSpace), err) } err = util.ValidateUsername(newlines) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", newlines), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", newlines), err) } err = util.ValidateUsername(goodUsername) @@ -141,7 +141,6 @@ func (suite *ValidationTestSuite) TestValidateEmail() { notAnEmailAddress := "this-is-no-email-address!" almostAnEmailAddress := "@thisisalmostan@email.address" aWebsite := "https://thisisawebsite.com" - tooLong := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaahhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhh@gmail.com" emailAddress := "thisis.actually@anemail.address" var err error @@ -165,11 +164,6 @@ func (suite *ValidationTestSuite) TestValidateEmail() { assert.Equal(suite.T(), errors.New("mail: missing '@' or angle-addr"), err) } - err = util.ValidateEmail(tooLong) - if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("email address should be no more than 256 chars but '%s' was 286", tooLong), err) - } - err = util.ValidateEmail(emailAddress) if assert.NoError(suite.T(), err) { assert.Equal(suite.T(), nil, err) |