From b9e0689359f347edc47487a8043c9004ead0770a Mon Sep 17 00:00:00 2001 From: Vyr Cossont Date: Fri, 31 Jan 2025 02:42:55 -0800 Subject: [bugfix] Extend parser to handle more non-Latin hashtags (#3700) * Allow marks after NFC normalization Includes regression test for the Tamil example from #3618 * Disallow just numbers + marks + underscore as hashtag --- internal/text/goldmark_parsers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'internal/text/goldmark_parsers.go') diff --git a/internal/text/goldmark_parsers.go b/internal/text/goldmark_parsers.go index b7cf4f9e9..e2c87e057 100644 --- a/internal/text/goldmark_parsers.go +++ b/internal/text/goldmark_parsers.go @@ -177,7 +177,7 @@ func (p *hashtagParser) Parse( // Ignore initial '#'. continue - case !isPlausiblyInHashtag(r) && + case !isPermittedInHashtag(r) && !isHashtagBoundary(r): // Weird non-boundary character // in the hashtag. Don't trust it. -- cgit v1.2.3