diff options
author | 2022-11-15 16:05:34 +0100 | |
---|---|---|
committer | 2022-11-15 16:05:34 +0100 | |
commit | 52109776f63ac59b2fef5cd7417becd9f0007acb (patch) | |
tree | 8ee4fd138806357257c975621bc20ba9141ddbe9 /internal/regexes | |
parent | [chore] fix profile spacing on very small screens (#1050) (diff) | |
download | gotosocial-52109776f63ac59b2fef5cd7417becd9f0007acb.tar.xz |
[bugfix] Fix unicode-unaware word boundary check in hashtags (#1049)
* [bugfix] Fix unicode-unaware word boundary check in hashtag regex
Go `\b` does not care for Unicode, and without lookahead, the workarounds got
very ugly. So I replaced the regex with a parser.
The parser runs in O(n) time and performance should not be affected.
* [bugfix] Add back hashtag max length and add tests for it
Diffstat (limited to 'internal/regexes')
-rw-r--r-- | internal/regexes/regexes.go | 9 |
1 files changed, 1 insertions, 8 deletions
diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go index dd3d9ce40..c9286611e 100644 --- a/internal/regexes/regexes.go +++ b/internal/regexes/regexes.go @@ -47,7 +47,6 @@ const ( const ( maximumUsernameLength = 64 maximumEmojiShortcodeLength = 30 - maximumHashtagLength = 30 ) var ( @@ -66,17 +65,11 @@ var ( // such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols) MentionName = regexp.MustCompile(mentionName) - // mention regex can be played around with here: https://regex101.com/r/G1oGR0/1 + // mention regex can be played around with here: https://regex101.com/r/P0vpYG/1 mentionFinder = `(?:^|\s)(@\w+(?:@[a-zA-Z0-9_\-\.]+)?)` // MentionFinder extracts mentions from a piece of text. MentionFinder = regexp.MustCompile(mentionFinder) - // hashtag regex can be played with here: https://regex101.com/r/bpyGlj/1 - hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[\p{L}\p{N}]{1,%d})(?:#|\b)`, maximumHashtagLength) - // HashtagFinder finds possible hashtags in a string. - // It returns just the string part of the hashtag, not the # symbol. - HashtagFinder = regexp.MustCompile(hashtagFinder) - emojiShortcode = fmt.Sprintf(`\w{2,%d}`, maximumEmojiShortcodeLength) // EmojiShortcode validates an emoji name. EmojiShortcode = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcode)) |