diff options
author | 2022-11-15 16:05:34 +0100 | |
---|---|---|
committer | 2022-11-15 16:05:34 +0100 | |
commit | 52109776f63ac59b2fef5cd7417becd9f0007acb (patch) | |
tree | 8ee4fd138806357257c975621bc20ba9141ddbe9 /internal/text | |
parent | [chore] fix profile spacing on very small screens (#1050) (diff) | |
download | gotosocial-52109776f63ac59b2fef5cd7417becd9f0007acb.tar.xz |
[bugfix] Fix unicode-unaware word boundary check in hashtags (#1049)
* [bugfix] Fix unicode-unaware word boundary check in hashtag regex
Go `\b` does not care for Unicode, and without lookahead, the workarounds got
very ugly. So I replaced the regex with a parser.
The parser runs in O(n) time and performance should not be affected.
* [bugfix] Add back hashtag max length and add tests for it
Diffstat (limited to 'internal/text')
-rw-r--r-- | internal/text/common.go | 48 |
1 files changed, 29 insertions, 19 deletions
diff --git a/internal/text/common.go b/internal/text/common.go index 005f9dfe1..ca4b97465 100644 --- a/internal/text/common.go +++ b/internal/text/common.go @@ -27,36 +27,46 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/regexes" + "github.com/superseriousbusiness/gotosocial/internal/util" ) func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string { - return regexes.ReplaceAllStringFunc(regexes.HashtagFinder, in, func(match string, buf *bytes.Buffer) string { - // we have a match - matchTrimmed := strings.TrimSpace(match) - tagAsEntered := matchTrimmed[1:] + spans := util.FindHashtagSpansInText(in) + + if len(spans) == 0 { + return in + } + + var b strings.Builder + i := 0 + +spans: + for _, t := range spans { + b.WriteString(in[i:t.First]) + i = t.Second + tagAsEntered := in[t.First+1 : t.Second] - // check through the tags to find what we're matching for _, tag := range tags { if strings.EqualFold(tagAsEntered, tag.Name) { - // Add any dropped space from match - if unicode.IsSpace(rune(match[0])) { - buf.WriteByte(match[0]) - } - // replace the #tag with the formatted tag content // `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a> - buf.WriteString(`<a href="`) - buf.WriteString(tag.URL) - buf.WriteString(`" class="mention hashtag" rel="tag">#<span>`) - buf.WriteString(tagAsEntered) - buf.WriteString(`</span></a>`) - return buf.String() + b.WriteString(`<a href="`) + b.WriteString(tag.URL) + b.WriteString(`" class="mention hashtag" rel="tag">#<span>`) + b.WriteString(tagAsEntered) + b.WriteString(`</span></a>`) + continue spans } } - // the match wasn't in the list of tags for whatever reason, so just return the match as we found it so nothing changes - return match - }) + b.WriteString(in[t.First:t.Second]) + } + + // Get the last bits. + i = spans[len(spans)-1].Second + b.WriteString(in[i:]) + + return b.String() } func (f *formatter) ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string { |