, do they?`"
withInlineCode2Expected = "Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?
"
withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!"
- withHashtagExpected = "Title
here's a simple status that uses hashtag #Hashtag!
"
+ withHashtagExpected = "Title
here's a simple status that uses hashtag #Hashtag!
"
mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a link.\n\nHere's an image:
"
mdWithHTMLExpected = "Title
Here's a simple text in markdown.
Here's a link.
Here's an image: 
"
mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: "
mdWithCheekyHTMLExpected = "Title
Here's a simple text in markdown.
Here's a cheeky little script:
"
mdWithHashtagInitial = "#welcome #Hashtag"
- mdWithHashtagInitialExpected = ""
+ mdWithHashtagInitialExpected = ""
mdCodeBlockWithNewlines = "some code coming up\n\n```\n\n\n\n```\nthat was some code"
mdCodeBlockWithNewlinesExpected = "some code coming up
\n\n\n
that was some code
"
mdWithFootnote = "fox mulder,fbi.[^1]\n\n[^1]: federated bureau of investigation"
@@ -63,7 +63,7 @@ const (
mdWithBlockQuote = "get ready, there's a block quote coming:\n\n>line1\n>line2\n>\n>line3\n\n"
mdWithBlockQuoteExpected = "get ready, there's a block quote coming:
line1
line2
line3
"
mdHashtagAndCodeBlock = "#Hashtag\n\n```\n#Hashtag\n```"
- mdHashtagAndCodeBlockExpected = "#Hashtag\n
"
+ mdHashtagAndCodeBlockExpected = "#Hashtag\n
"
mdMentionAndCodeBlock = "@the_mighty_zork\n\n```\n@the_mighty_zork\n```"
mdMentionAndCodeBlockExpected = "@the_mighty_zork\n
"
mdWithSmartypants = "\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping"
@@ -77,9 +77,9 @@ const (
mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps"
mdObjectInCodeBlockExpected = "@foss_satan this is how to mention a user
@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n
hope that helps
"
mdItalicHashtag = "_#hashtag_"
- mdItalicHashtagExpected = ""
+ mdItalicHashtagExpected = ""
mdItalicHashtags = "_#hashtag #hashtag #hashtag_"
- mdItalicHashtagsExpected = ""
+ mdItalicHashtagsExpected = ""
// BEWARE: sneaky unicode business going on.
// the first ö is one rune, the second ö is an o with a combining diacritic.
mdUnnormalizedHashtag = "#hellöthere #hellöthere"
diff --git a/internal/text/normalize.go b/internal/text/normalize.go
new file mode 100644
index 000000000..14caf6311
--- /dev/null
+++ b/internal/text/normalize.go
@@ -0,0 +1,60 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package text
+
+import (
+ "strings"
+
+ "github.com/superseriousbusiness/gotosocial/internal/util"
+ "golang.org/x/text/unicode/norm"
+)
+
+const (
+ maximumHashtagLength = 100
+)
+
+// NormalizeHashtag normalizes the given hashtag text by
+// removing the initial '#' symbol, and then decomposing
+// and canonically recomposing chars + combining diacritics
+// in the text to single unicode characters, following
+// Normalization Form C (https://unicode.org/reports/tr15/).
+//
+// Finally, it will do a check on the normalized string to
+// ensure that it's below maximumHashtagLength chars, and
+// contains only unicode letters and numbers. If this passes,
+// returned bool will be true.
+func NormalizeHashtag(text string) (string, bool) {
+ // This normalization is specifically to avoid cases
+ // where visually-identical hashtags are stored with
+ // different unicode representations (e.g. with combining
+ // diacritics). It allows a tasteful number of combining
+ // diacritics to be used, as long as they can be combined
+ // with parent characters to form regular letter symbols.
+ normalized := norm.NFC.String(strings.TrimPrefix(text, "#"))
+
+ // Validate normalized.
+ ok := true
+ for i, r := range normalized {
+ if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) {
+ ok = false
+ break
+ }
+ }
+
+ return normalized, ok
+}
diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go
index 5a2918563..dfcf8b953 100644
--- a/internal/text/plain_test.go
+++ b/internal/text/plain_test.go
@@ -34,7 +34,7 @@ const (
withHTML = "blah this should just be html escaped blah"
withHTMLExpected = "<div>blah this should just be html escaped blah</div>
"
moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText\n\n:rainbow:"
- moreComplexExpected = "Another test @foss_satan
#Hashtag
Text
:rainbow:
"
+ moreComplexExpected = "Another test @foss_satan
#Hashtag
Text
:rainbow:
"
)
type PlainTestSuite struct {
@@ -103,7 +103,7 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() {
#111111 thisalsoshouldn'twork#### ##
#alimentación, #saúde, #lävistää, #ö, #네
-#ThisOneIsThirtyOneCharactersLon... ...ng
+#ThisOneIsOneHundredAndOneCharactersLongWhichIsReallyJustWayWayTooLongDefinitelyLongerThanYouWouldNeed...
#ThisOneIsThirteyCharactersLong
`
@@ -141,7 +141,7 @@ func (suite *PlainTestSuite) TestDeriveMultiple() {
assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString)
assert.Len(suite.T(), f.Tags, 1)
- assert.Equal(suite.T(), "Hashtag", f.Tags[0].Name)
+ assert.Equal(suite.T(), "hashtag", f.Tags[0].Name)
assert.Len(suite.T(), f.Emojis, 0)
}
diff --git a/internal/text/replace.go b/internal/text/replace.go
index e8e02454e..db72aaf1d 100644
--- a/internal/text/replace.go
+++ b/internal/text/replace.go
@@ -23,19 +23,13 @@ import (
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
+ "github.com/superseriousbusiness/gotosocial/internal/gtserror"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/log"
- "github.com/superseriousbusiness/gotosocial/internal/util"
- "golang.org/x/text/unicode/norm"
+ "github.com/superseriousbusiness/gotosocial/internal/uris"
)
-const (
- maximumHashtagLength = 30
-)
-
-// given a mention or a hashtag string, the methods in this file will attempt to parse it,
-// add it to the database, and render it as HTML. If any of these steps fails, the method
-// will just return the original string and log an error.
-
// replaceMention takes a string in the form @username@domain.com or @localusername
func (r *customRenderer) replaceMention(text string) string {
mention, err := r.parseMention(r.ctx, text, r.accountID, r.statusID)
@@ -90,55 +84,78 @@ func (r *customRenderer) replaceMention(text string) string {
return b.String()
}
-// replaceMention takes a string in the form #HashedTag, and will normalize it before
-// adding it to the db and turning it into HTML.
+// replaceHashtag takes a string in the form #SomeHashtag, and will normalize
+// it before adding it to the db (or just getting it from the db if it already
+// exists) and turning it into HTML.
func (r *customRenderer) replaceHashtag(text string) string {
- // this normalization is specifically to avoid cases where visually-identical
- // hashtags are stored with different unicode representations (e.g. with combining
- // diacritics). It allows a tasteful number of combining diacritics to be used,
- // as long as they can be combined with parent characters to form regular letter
- // symbols.
- normalized := norm.NFC.String(text[1:])
-
- for i, r := range normalized {
- if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) {
- return text
- }
+ normalized, ok := NormalizeHashtag(text)
+ if !ok {
+ // Not a valid hashtag.
+ return text
}
- tag, err := r.f.db.TagStringToTag(r.ctx, normalized, r.accountID)
+ tag, err := r.getOrCreateHashtag(normalized)
if err != nil {
log.Errorf(r.ctx, "error generating hashtags from status: %s", err)
return text
}
- // only append if it's not been listed yet
- listed := false
- for _, t := range r.result.Tags {
- if tag.ID == t.ID {
- listed = true
- break
- }
- }
- if !listed {
- err = r.f.db.Put(r.ctx, tag)
- if err != nil {
- if !errors.Is(err, db.ErrAlreadyExists) {
- log.Errorf(r.ctx, "error putting tags in db: %s", err)
- return text
+ // Append tag to result if not done already.
+ //
+ // This prevents multiple uses of a tag in
+ // the same status generating multiple
+ // entries for the same tag in result.
+ func() {
+ for _, t := range r.result.Tags {
+ if tag.ID == t.ID {
+ // Already appended.
+ return
}
}
+
+ // Not appended yet.
r.result.Tags = append(r.result.Tags, tag)
- }
+ }()
+ // Replace tag with the formatted tag content, eg. `#SomeHashtag` becomes:
+ // `#SomeHashtag`
var b strings.Builder
- // replace the #tag with the formatted tag content
- // `#tagAsEntered
b.WriteString(`#`)
b.WriteString(normalized)
b.WriteString(``)
return b.String()
}
+
+func (r *customRenderer) getOrCreateHashtag(name string) (*gtsmodel.Tag, error) {
+ var (
+ tag *gtsmodel.Tag
+ err error
+ )
+
+ // Check if we have a tag with this name already.
+ tag, err = r.f.db.GetTagByName(r.ctx, name)
+ if err != nil && !errors.Is(err, db.ErrNoEntries) {
+ return nil, gtserror.Newf("db error getting tag %s: %w", name, err)
+ }
+
+ if tag != nil {
+ // We had it!
+ return tag, nil
+ }
+
+ // We didn't have a tag with
+ // this name, create one.
+ tag = >smodel.Tag{
+ ID: id.NewULID(),
+ Name: name,
+ }
+
+ if err = r.f.db.PutTag(r.ctx, tag); err != nil {
+ return nil, gtserror.Newf("db error putting new tag %s: %w", name, err)
+ }
+
+ return tag, nil
+}
--
cgit v1.2.3