summaryrefslogtreecommitdiff
path: root/internal/text/util.go
diff options
context:
space:
mode:
authorLibravatar Vyr Cossont <VyrCossont@users.noreply.github.com>2025-01-31 02:42:55 -0800
committerLibravatar GitHub <noreply@github.com>2025-01-31 11:42:55 +0100
commitb9e0689359f347edc47487a8043c9004ead0770a (patch)
tree514077f83214533ec359a79e0033dcd9015d4ff2 /internal/text/util.go
parent[feature] Add system message wrappers for pending replies and placeholder att... (diff)
downloadgotosocial-b9e0689359f347edc47487a8043c9004ead0770a.tar.xz
[bugfix] Extend parser to handle more non-Latin hashtags (#3700)
* Allow marks after NFC normalization Includes regression test for the Tamil example from #3618 * Disallow just numbers + marks + underscore as hashtag
Diffstat (limited to 'internal/text/util.go')
-rw-r--r--internal/text/util.go17
1 files changed, 6 insertions, 11 deletions
diff --git a/internal/text/util.go b/internal/text/util.go
index af45cfaf0..47b2416dd 100644
--- a/internal/text/util.go
+++ b/internal/text/util.go
@@ -19,19 +19,14 @@ package text
import "unicode"
-func isPlausiblyInHashtag(r rune) bool {
- // Marks are allowed during parsing
- // prior to normalization, but not after,
- // since they may be combined into letters
- // during normalization.
- return unicode.IsMark(r) ||
- isPermittedInHashtag(r)
+func isPermittedInHashtag(r rune) bool {
+ return unicode.IsLetter(r) || isPermittedIfNotEntireHashtag(r)
}
-func isPermittedInHashtag(r rune) bool {
- return unicode.IsLetter(r) ||
- unicode.IsNumber(r) ||
- r == '_'
+// isPermittedIfNotEntireHashtag is true for characters that may be in a hashtag
+// but are not allowed to be the only characters making up the hashtag.
+func isPermittedIfNotEntireHashtag(r rune) bool {
+ return unicode.IsNumber(r) || unicode.IsMark(r) || r == '_'
}
// isHashtagBoundary returns true if rune r