summaryrefslogtreecommitdiff
path: root/internal/regexes
diff options
context:
space:
mode:
authorLibravatar ugla <ugla@u8.is>2022-11-15 16:05:34 +0100
committerLibravatar GitHub <noreply@github.com>2022-11-15 16:05:34 +0100
commit52109776f63ac59b2fef5cd7417becd9f0007acb (patch)
tree8ee4fd138806357257c975621bc20ba9141ddbe9 /internal/regexes
parent[chore] fix profile spacing on very small screens (#1050) (diff)
downloadgotosocial-52109776f63ac59b2fef5cd7417becd9f0007acb.tar.xz
[bugfix] Fix unicode-unaware word boundary check in hashtags (#1049)
* [bugfix] Fix unicode-unaware word boundary check in hashtag regex Go `\b` does not care for Unicode, and without lookahead, the workarounds got very ugly. So I replaced the regex with a parser. The parser runs in O(n) time and performance should not be affected. * [bugfix] Add back hashtag max length and add tests for it
Diffstat (limited to 'internal/regexes')
-rw-r--r--internal/regexes/regexes.go9
1 files changed, 1 insertions, 8 deletions
diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go
index dd3d9ce40..c9286611e 100644
--- a/internal/regexes/regexes.go
+++ b/internal/regexes/regexes.go
@@ -47,7 +47,6 @@ const (
const (
maximumUsernameLength = 64
maximumEmojiShortcodeLength = 30
- maximumHashtagLength = 30
)
var (
@@ -66,17 +65,11 @@ var (
// such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols)
MentionName = regexp.MustCompile(mentionName)
- // mention regex can be played around with here: https://regex101.com/r/G1oGR0/1
+ // mention regex can be played around with here: https://regex101.com/r/P0vpYG/1
mentionFinder = `(?:^|\s)(@\w+(?:@[a-zA-Z0-9_\-\.]+)?)`
// MentionFinder extracts mentions from a piece of text.
MentionFinder = regexp.MustCompile(mentionFinder)
- // hashtag regex can be played with here: https://regex101.com/r/bpyGlj/1
- hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[\p{L}\p{N}]{1,%d})(?:#|\b)`, maximumHashtagLength)
- // HashtagFinder finds possible hashtags in a string.
- // It returns just the string part of the hashtag, not the # symbol.
- HashtagFinder = regexp.MustCompile(hashtagFinder)
-
emojiShortcode = fmt.Sprintf(`\w{2,%d}`, maximumEmojiShortcodeLength)
// EmojiShortcode validates an emoji name.
EmojiShortcode = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcode))