diff options
author | 2023-07-31 15:47:35 +0200 | |
---|---|---|
committer | 2023-07-31 15:47:35 +0200 | |
commit | 2796a2e82f16ade9872008878cf88299bd66b4e7 (patch) | |
tree | 76f7b69cc1da57ca10b71c57abf1892575bea100 /internal/text | |
parent | [performance] cache follow, follow request and block ID lists (#2027) (diff) | |
download | gotosocial-2796a2e82f16ade9872008878cf88299bd66b4e7.tar.xz |
[feature] Hashtag federation (in/out), hashtag client API endpoints (#2032)
* update go-fed
* do the things
* remove unused columns from tags
* update to latest lingo from main
* further tag shenanigans
* serve stub page at tag endpoint
* we did it lads
* tests, oh tests, ohhh tests, oh tests (doo doo doo doo)
* swagger docs
* document hashtag usage + federation
* instanceGet
* don't bother parsing tag href
* rename whereStartsWith -> whereStartsLike
* remove GetOrCreateTag
* dont cache status tag timelineability
Diffstat (limited to 'internal/text')
-rw-r--r-- | internal/text/markdown_test.go | 10 | ||||
-rw-r--r-- | internal/text/normalize.go | 60 | ||||
-rw-r--r-- | internal/text/plain_test.go | 6 | ||||
-rw-r--r-- | internal/text/replace.go | 101 |
4 files changed, 127 insertions, 50 deletions
diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go index 86e663dad..2602506ca 100644 --- a/internal/text/markdown_test.go +++ b/internal/text/markdown_test.go @@ -49,13 +49,13 @@ const ( withInlineCode2 = "`Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?`" withInlineCode2Expected = "<p><code>Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?</code></p>" withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" - withHashtagExpected = "<h1>Title</h1><p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a>!</p>" + withHashtagExpected = "<h1>Title</h1><p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a>!</p>" mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a <a href=\"https://example.org\">link</a>.\n\nHere's an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\">" mdWithHTMLExpected = "<h1>Title</h1><p>Here's a simple text in markdown.</p><p>Here's a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p><p>Here's an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\" crossorigin=\"anonymous\"></p>" mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: <script>alert(ahhhh)</script>" mdWithCheekyHTMLExpected = "<h1>Title</h1><p>Here's a simple text in markdown.</p><p>Here's a cheeky little script:</p>" mdWithHashtagInitial = "#welcome #Hashtag" - mdWithHashtagInitialExpected = "<p><a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a></p>" + mdWithHashtagInitialExpected = "<p><a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a></p>" mdCodeBlockWithNewlines = "some code coming up\n\n```\n\n\n\n```\nthat was some code" mdCodeBlockWithNewlinesExpected = "<p>some code coming up</p><pre><code>\n\n\n</code></pre><p>that was some code</p>" mdWithFootnote = "fox mulder,fbi.[^1]\n\n[^1]: federated bureau of investigation" @@ -63,7 +63,7 @@ const ( mdWithBlockQuote = "get ready, there's a block quote coming:\n\n>line1\n>line2\n>\n>line3\n\n" mdWithBlockQuoteExpected = "<p>get ready, there's a block quote coming:</p><blockquote><p>line1<br>line2</p><p>line3</p></blockquote>" mdHashtagAndCodeBlock = "#Hashtag\n\n```\n#Hashtag\n```" - mdHashtagAndCodeBlockExpected = "<p><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a></p><pre><code>#Hashtag\n</code></pre>" + mdHashtagAndCodeBlockExpected = "<p><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a></p><pre><code>#Hashtag\n</code></pre>" mdMentionAndCodeBlock = "@the_mighty_zork\n\n```\n@the_mighty_zork\n```" mdMentionAndCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>the_mighty_zork</span></a></span></p><pre><code>@the_mighty_zork\n</code></pre>" mdWithSmartypants = "\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping" @@ -77,9 +77,9 @@ const ( mdObjectInCodeBlock = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps" mdObjectInCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span> this is how to mention a user</p><pre><code>@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n</code></pre><p>hope that helps</p>" mdItalicHashtag = "_#hashtag_" - mdItalicHashtagExpected = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" + mdItalicHashtagExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" mdItalicHashtags = "_#hashtag #hashtag #hashtag_" - mdItalicHashtagsExpected = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" + mdItalicHashtagsExpected = "<p><em><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" // BEWARE: sneaky unicode business going on. // the first ö is one rune, the second ö is an o with a combining diacritic. mdUnnormalizedHashtag = "#hellöthere #hellöthere" diff --git a/internal/text/normalize.go b/internal/text/normalize.go new file mode 100644 index 000000000..14caf6311 --- /dev/null +++ b/internal/text/normalize.go @@ -0,0 +1,60 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package text + +import ( + "strings" + + "github.com/superseriousbusiness/gotosocial/internal/util" + "golang.org/x/text/unicode/norm" +) + +const ( + maximumHashtagLength = 100 +) + +// NormalizeHashtag normalizes the given hashtag text by +// removing the initial '#' symbol, and then decomposing +// and canonically recomposing chars + combining diacritics +// in the text to single unicode characters, following +// Normalization Form C (https://unicode.org/reports/tr15/). +// +// Finally, it will do a check on the normalized string to +// ensure that it's below maximumHashtagLength chars, and +// contains only unicode letters and numbers. If this passes, +// returned bool will be true. +func NormalizeHashtag(text string) (string, bool) { + // This normalization is specifically to avoid cases + // where visually-identical hashtags are stored with + // different unicode representations (e.g. with combining + // diacritics). It allows a tasteful number of combining + // diacritics to be used, as long as they can be combined + // with parent characters to form regular letter symbols. + normalized := norm.NFC.String(strings.TrimPrefix(text, "#")) + + // Validate normalized. + ok := true + for i, r := range normalized { + if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) { + ok = false + break + } + } + + return normalized, ok +} diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 5a2918563..dfcf8b953 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -34,7 +34,7 @@ const ( withHTML = "<div>blah this should just be html escaped blah</div>" withHTMLExpected = "<p><div>blah this should just be html escaped blah</div></p>" moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText\n\n:rainbow:" - moreComplexExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text<br><br>:rainbow:</p>" + moreComplexExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text<br><br>:rainbow:</p>" ) type PlainTestSuite struct { @@ -103,7 +103,7 @@ func (suite *PlainTestSuite) TestDeriveHashtagsOK() { #111111 thisalsoshouldn'twork#### ## #alimentación, #saúde, #lävistää, #ö, #네 -#ThisOneIsThirtyOneCharactersLon... ...ng +#ThisOneIsOneHundredAndOneCharactersLongWhichIsReallyJustWayWayTooLongDefinitelyLongerThanYouWouldNeed... #ThisOneIsThirteyCharactersLong ` @@ -141,7 +141,7 @@ func (suite *PlainTestSuite) TestDeriveMultiple() { assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString) assert.Len(suite.T(), f.Tags, 1) - assert.Equal(suite.T(), "Hashtag", f.Tags[0].Name) + assert.Equal(suite.T(), "hashtag", f.Tags[0].Name) assert.Len(suite.T(), f.Emojis, 0) } diff --git a/internal/text/replace.go b/internal/text/replace.go index e8e02454e..db72aaf1d 100644 --- a/internal/text/replace.go +++ b/internal/text/replace.go @@ -23,19 +23,13 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/id" "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/util" - "golang.org/x/text/unicode/norm" + "github.com/superseriousbusiness/gotosocial/internal/uris" ) -const ( - maximumHashtagLength = 30 -) - -// given a mention or a hashtag string, the methods in this file will attempt to parse it, -// add it to the database, and render it as HTML. If any of these steps fails, the method -// will just return the original string and log an error. - // replaceMention takes a string in the form @username@domain.com or @localusername func (r *customRenderer) replaceMention(text string) string { mention, err := r.parseMention(r.ctx, text, r.accountID, r.statusID) @@ -90,55 +84,78 @@ func (r *customRenderer) replaceMention(text string) string { return b.String() } -// replaceMention takes a string in the form #HashedTag, and will normalize it before -// adding it to the db and turning it into HTML. +// replaceHashtag takes a string in the form #SomeHashtag, and will normalize +// it before adding it to the db (or just getting it from the db if it already +// exists) and turning it into HTML. func (r *customRenderer) replaceHashtag(text string) string { - // this normalization is specifically to avoid cases where visually-identical - // hashtags are stored with different unicode representations (e.g. with combining - // diacritics). It allows a tasteful number of combining diacritics to be used, - // as long as they can be combined with parent characters to form regular letter - // symbols. - normalized := norm.NFC.String(text[1:]) - - for i, r := range normalized { - if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) { - return text - } + normalized, ok := NormalizeHashtag(text) + if !ok { + // Not a valid hashtag. + return text } - tag, err := r.f.db.TagStringToTag(r.ctx, normalized, r.accountID) + tag, err := r.getOrCreateHashtag(normalized) if err != nil { log.Errorf(r.ctx, "error generating hashtags from status: %s", err) return text } - // only append if it's not been listed yet - listed := false - for _, t := range r.result.Tags { - if tag.ID == t.ID { - listed = true - break - } - } - if !listed { - err = r.f.db.Put(r.ctx, tag) - if err != nil { - if !errors.Is(err, db.ErrAlreadyExists) { - log.Errorf(r.ctx, "error putting tags in db: %s", err) - return text + // Append tag to result if not done already. + // + // This prevents multiple uses of a tag in + // the same status generating multiple + // entries for the same tag in result. + func() { + for _, t := range r.result.Tags { + if tag.ID == t.ID { + // Already appended. + return } } + + // Not appended yet. r.result.Tags = append(r.result.Tags, tag) - } + }() + // Replace tag with the formatted tag content, eg. `#SomeHashtag` becomes: + // `<a href="https://example.org/tags/somehashtag" class="mention hashtag" rel="tag">#<span>SomeHashtag</span></a>` var b strings.Builder - // replace the #tag with the formatted tag content - // `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a> b.WriteString(`<a href="`) - b.WriteString(tag.URL) + b.WriteString(uris.GenerateURIForTag(normalized)) b.WriteString(`" class="mention hashtag" rel="tag">#<span>`) b.WriteString(normalized) b.WriteString(`</span></a>`) return b.String() } + +func (r *customRenderer) getOrCreateHashtag(name string) (*gtsmodel.Tag, error) { + var ( + tag *gtsmodel.Tag + err error + ) + + // Check if we have a tag with this name already. + tag, err = r.f.db.GetTagByName(r.ctx, name) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, gtserror.Newf("db error getting tag %s: %w", name, err) + } + + if tag != nil { + // We had it! + return tag, nil + } + + // We didn't have a tag with + // this name, create one. + tag = >smodel.Tag{ + ID: id.NewULID(), + Name: name, + } + + if err = r.f.db.PutTag(r.ctx, tag); err != nil { + return nil, gtserror.Newf("db error putting new tag %s: %w", name, err) + } + + return tag, nil +} |