summaryrefslogtreecommitdiff
path: root/internal/text/common.go
diff options
context:
space:
mode:
authorLibravatar ugla <ugla@u8.is>2022-11-15 16:05:34 +0100
committerLibravatar GitHub <noreply@github.com>2022-11-15 16:05:34 +0100
commit52109776f63ac59b2fef5cd7417becd9f0007acb (patch)
tree8ee4fd138806357257c975621bc20ba9141ddbe9 /internal/text/common.go
parent[chore] fix profile spacing on very small screens (#1050) (diff)
downloadgotosocial-52109776f63ac59b2fef5cd7417becd9f0007acb.tar.xz
[bugfix] Fix unicode-unaware word boundary check in hashtags (#1049)
* [bugfix] Fix unicode-unaware word boundary check in hashtag regex Go `\b` does not care for Unicode, and without lookahead, the workarounds got very ugly. So I replaced the regex with a parser. The parser runs in O(n) time and performance should not be affected. * [bugfix] Add back hashtag max length and add tests for it
Diffstat (limited to 'internal/text/common.go')
-rw-r--r--internal/text/common.go48
1 files changed, 29 insertions, 19 deletions
diff --git a/internal/text/common.go b/internal/text/common.go
index 005f9dfe1..ca4b97465 100644
--- a/internal/text/common.go
+++ b/internal/text/common.go
@@ -27,36 +27,46 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
+ "github.com/superseriousbusiness/gotosocial/internal/util"
)
func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string {
- return regexes.ReplaceAllStringFunc(regexes.HashtagFinder, in, func(match string, buf *bytes.Buffer) string {
- // we have a match
- matchTrimmed := strings.TrimSpace(match)
- tagAsEntered := matchTrimmed[1:]
+ spans := util.FindHashtagSpansInText(in)
+
+ if len(spans) == 0 {
+ return in
+ }
+
+ var b strings.Builder
+ i := 0
+
+spans:
+ for _, t := range spans {
+ b.WriteString(in[i:t.First])
+ i = t.Second
+ tagAsEntered := in[t.First+1 : t.Second]
- // check through the tags to find what we're matching
for _, tag := range tags {
if strings.EqualFold(tagAsEntered, tag.Name) {
- // Add any dropped space from match
- if unicode.IsSpace(rune(match[0])) {
- buf.WriteByte(match[0])
- }
-
// replace the #tag with the formatted tag content
// `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a>
- buf.WriteString(`<a href="`)
- buf.WriteString(tag.URL)
- buf.WriteString(`" class="mention hashtag" rel="tag">#<span>`)
- buf.WriteString(tagAsEntered)
- buf.WriteString(`</span></a>`)
- return buf.String()
+ b.WriteString(`<a href="`)
+ b.WriteString(tag.URL)
+ b.WriteString(`" class="mention hashtag" rel="tag">#<span>`)
+ b.WriteString(tagAsEntered)
+ b.WriteString(`</span></a>`)
+ continue spans
}
}
- // the match wasn't in the list of tags for whatever reason, so just return the match as we found it so nothing changes
- return match
- })
+ b.WriteString(in[t.First:t.Second])
+ }
+
+ // Get the last bits.
+ i = spans[len(spans)-1].Second
+ b.WriteString(in[i:])
+
+ return b.String()
}
func (f *formatter) ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string {