summaryrefslogtreecommitdiff
path: root/internal/util/statustools_test.go
diff options
context:
space:
mode:
authorLibravatar ugla <ugla@u8.is>2022-11-15 16:05:34 +0100
committerLibravatar GitHub <noreply@github.com>2022-11-15 16:05:34 +0100
commit52109776f63ac59b2fef5cd7417becd9f0007acb (patch)
tree8ee4fd138806357257c975621bc20ba9141ddbe9 /internal/util/statustools_test.go
parent[chore] fix profile spacing on very small screens (#1050) (diff)
downloadgotosocial-52109776f63ac59b2fef5cd7417becd9f0007acb.tar.xz
[bugfix] Fix unicode-unaware word boundary check in hashtags (#1049)
* [bugfix] Fix unicode-unaware word boundary check in hashtag regex Go `\b` does not care for Unicode, and without lookahead, the workarounds got very ugly. So I replaced the regex with a parser. The parser runs in O(n) time and performance should not be affected. * [bugfix] Add back hashtag max length and add tests for it
Diffstat (limited to 'internal/util/statustools_test.go')
-rw-r--r--internal/util/statustools_test.go44
1 files changed, 34 insertions, 10 deletions
diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go
index d9f344e4b..214fab553 100644
--- a/internal/util/statustools_test.go
+++ b/internal/util/statustools_test.go
@@ -77,26 +77,50 @@ func (suite *StatusTestSuite) TestDeriveHashtagsOK() {
# testing this one shouldn't work
- #thisshouldwork
+ #thisshouldwork #dupe #dupe!! #dupe
here's a link with a fragment: https://example.org/whatever#ahhh
+ here's another link with a fragment: https://example.org/whatever/#ahhh
-#ThisShouldAlsoWork #not_this_though
+(#ThisShouldAlsoWork) #not_this_though
#111111 thisalsoshouldn'twork#### ##
-#alimentación, #saúde
+#alimentación, #saúde, #lävistää, #ö, #네
+#ThisOneIsThirtyOneCharactersLon... ...ng
+#ThisOneIsThirteyCharactersLong
`
tags := util.DeriveHashtagsFromText(statusText)
- assert.Len(suite.T(), tags, 7)
+ assert.Len(suite.T(), tags, 12)
assert.Equal(suite.T(), "testing123", tags[0])
assert.Equal(suite.T(), "also", tags[1])
assert.Equal(suite.T(), "thisshouldwork", tags[2])
- assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[3])
- assert.Equal(suite.T(), "111111", tags[4])
- assert.Equal(suite.T(), "alimentación", tags[5])
- assert.Equal(suite.T(), "saúde", tags[6])
+ assert.Equal(suite.T(), "dupe", tags[3])
+ assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4])
+ assert.Equal(suite.T(), "111111", tags[5])
+ assert.Equal(suite.T(), "alimentación", tags[6])
+ assert.Equal(suite.T(), "saúde", tags[7])
+ assert.Equal(suite.T(), "lävistää", tags[8])
+ assert.Equal(suite.T(), "ö", tags[9])
+ assert.Equal(suite.T(), "네", tags[10])
+ assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[11])
+
+ statusText = `#올빼미 hej`
+ tags = util.DeriveHashtagsFromText(statusText)
+ assert.Equal(suite.T(), "올빼미", tags[0])
+}
+
+func (suite *StatusTestSuite) TestHashtagSpansOK() {
+ statusText := `#0 #3 #8aa`
+
+ spans := util.FindHashtagSpansInText(statusText)
+ assert.Equal(suite.T(), 0, spans[0].First)
+ assert.Equal(suite.T(), 2, spans[0].Second)
+ assert.Equal(suite.T(), 3, spans[1].First)
+ assert.Equal(suite.T(), 5, spans[1].Second)
+ assert.Equal(suite.T(), 8, spans[2].First)
+ assert.Equal(suite.T(), 12, spans[2].Second)
}
func (suite *StatusTestSuite) TestDeriveEmojiOK() {
@@ -127,7 +151,7 @@ Here's some normal text with an :emoji: at the end
func (suite *StatusTestSuite) TestDeriveMultiple() {
statusText := `Another test @foss_satan@fossbros-anonymous.io
- #Hashtag
+ #HashTag
Text`
@@ -139,7 +163,7 @@ func (suite *StatusTestSuite) TestDeriveMultiple() {
assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", ms[0])
assert.Len(suite.T(), hs, 1)
- assert.Equal(suite.T(), "Hashtag", hs[0])
+ assert.Contains(suite.T(), hs, "HashTag")
assert.Len(suite.T(), es, 0)
}