From 9477fd7eba9bda6813b65c6c54380904892ca35e Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Sun, 8 Dec 2024 16:03:00 +0100 Subject: [feature] Allow partial-word hashtags using non-breaking spaces (#3606) * [feature] Allow partial-word hashtags using non-breaking spaces * update docs --- internal/text/plain_test.go | 13 +++++++++++++ internal/text/util.go | 30 ++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) (limited to 'internal/text') diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 48280bb44..fac54a38e 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -36,6 +36,8 @@ const ( moreComplexExpected = "
Another test @foss_satan
#Hashtag
Text
:rainbow:
here's a link with utf-8 characters in it: https://example.org/söme_url
" + withFunkyTags = "#hashtag1 pee #hashtag2\u200Bpee #hashtag3|poo #hashtag4\uFEFFpoo" + withFunkyTagsExpected = "#hashtag1 pee #hashtag2\u200bpee #hashtag3|poo #hashtag4\ufeffpoo
" ) type PlainTestSuite struct { @@ -136,6 +138,17 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() { suite.Equal("올빼미", tags[0].Name) } +func (suite *PlainTestSuite) TestFunkyTags() { + formatted := suite.FromPlain(withFunkyTags) + suite.Equal(withFunkyTagsExpected, formatted.HTML) + + tags := formatted.Tags + suite.Equal("hashtag1", tags[0].Name) + suite.Equal("hashtag2", tags[1].Name) + suite.Equal("hashtag3", tags[2].Name) + suite.Equal("hashtag4", tags[3].Name) +} + func (suite *PlainTestSuite) TestDeriveMultiple() { statusText := `Another test @foss_satan@fossbros-anonymous.io diff --git a/internal/text/util.go b/internal/text/util.go index 204c64838..af45cfaf0 100644 --- a/internal/text/util.go +++ b/internal/text/util.go @@ -38,8 +38,34 @@ func isPermittedInHashtag(r rune) bool { // is a recognized break character for before // or after a #hashtag. func isHashtagBoundary(r rune) bool { - return unicode.IsSpace(r) || - (unicode.IsPunct(r) && r != '_') + switch { + + // Zero width space. + case r == '\u200B': + return true + + // Zero width no-break space. + case r == '\uFEFF': + return true + + // Pipe character sometimes + // used as workaround. + case r == '|': + return true + + // Standard Unicode white space. + case unicode.IsSpace(r): + return true + + // Non-underscore punctuation. + case unicode.IsPunct(r) && r != '_': + return true + + // Not recognized + // hashtag boundary. + default: + return false + } } // isMentionBoundary returns true if rune r -- cgit v1.3