From 664713ddd4f7236fde0759cf7a0e04a434417876 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Sun, 3 Jul 2022 11:03:03 +0200 Subject: [bugfix] Make hashtag regex work with non-ascii characters (#682) --- internal/regexes/regexes.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'internal/regexes/regexes.go') diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go index 58635b6b4..dd3d9ce40 100644 --- a/internal/regexes/regexes.go +++ b/internal/regexes/regexes.go @@ -71,8 +71,8 @@ var ( // MentionFinder extracts mentions from a piece of text. MentionFinder = regexp.MustCompile(mentionFinder) - // hashtag regex can be played with here: https://regex101.com/r/bPxeca/1 - hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[a-zA-Z0-9]{1,%d})(?:#|\b)`, maximumHashtagLength) + // hashtag regex can be played with here: https://regex101.com/r/bpyGlj/1 + hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[\p{L}\p{N}]{1,%d})(?:#|\b)`, maximumHashtagLength) // HashtagFinder finds possible hashtags in a string. // It returns just the string part of the hashtag, not the # symbol. HashtagFinder = regexp.MustCompile(hashtagFinder) -- cgit v1.2.3