summaryrefslogtreecommitdiff
path: root/internal/text/normalize.go
diff options
context:
space:
mode:
authorLibravatar Vyr Cossont <VyrCossont@users.noreply.github.com>2025-01-31 02:42:55 -0800
committerLibravatar GitHub <noreply@github.com>2025-01-31 11:42:55 +0100
commitb9e0689359f347edc47487a8043c9004ead0770a (patch)
tree514077f83214533ec359a79e0033dcd9015d4ff2 /internal/text/normalize.go
parent[feature] Add system message wrappers for pending replies and placeholder att... (diff)
downloadgotosocial-b9e0689359f347edc47487a8043c9004ead0770a.tar.xz
[bugfix] Extend parser to handle more non-Latin hashtags (#3700)
* Allow marks after NFC normalization Includes regression test for the Tamil example from #3618 * Disallow just numbers + marks + underscore as hashtag
Diffstat (limited to 'internal/text/normalize.go')
-rw-r--r--internal/text/normalize.go17
1 files changed, 8 insertions, 9 deletions
diff --git a/internal/text/normalize.go b/internal/text/normalize.go
index d2e633d1e..ea266fb33 100644
--- a/internal/text/normalize.go
+++ b/internal/text/normalize.go
@@ -50,17 +50,16 @@ func NormalizeHashtag(text string) (string, bool) {
// Validate normalized result.
var (
- notJustUnderscores = false
- onlyPermittedChars = true
- lengthOK = true
+ atLeastOneRequiredChar = false
+ onlyPermittedChars = true
+ lengthOK = true
)
for i, r := range normalized {
- if r != '_' {
- // This isn't an underscore,
- // so the whole hashtag isn't
- // just underscores.
- notJustUnderscores = true
+ if !isPermittedIfNotEntireHashtag(r) {
+ // This isn't an underscore, mark, etc,
+ // so the hashtag contains at least one
+ atLeastOneRequiredChar = true
}
if i >= maximumHashtagLength {
@@ -74,5 +73,5 @@ func NormalizeHashtag(text string) (string, bool) {
}
}
- return normalized, (lengthOK && onlyPermittedChars && notJustUnderscores)
+ return normalized, lengthOK && onlyPermittedChars && atLeastOneRequiredChar
}