summaryrefslogtreecommitdiff
path: root/internal/text
diff options
context:
space:
mode:
authorLibravatar Tobi Smethurst <31960611+tsmethurst@users.noreply.github.com>2021-07-29 13:18:22 +0200
committerLibravatar GitHub <noreply@github.com>2021-07-29 13:18:22 +0200
commita940a520d301d00f42012743b3999a73f7180848 (patch)
tree50bdd749381d6f773df46dbc4cc33a9b533a4e7b /internal/text
parentLink parsing (#120) (diff)
downloadgotosocial-a940a520d301d00f42012743b3999a73f7180848.tar.xz
Link hashtag bug (#121)
* link + hashtag bug * remove printlns * tidy up some duplicated code
Diffstat (limited to 'internal/text')
-rw-r--r--internal/text/common.go31
-rw-r--r--internal/text/formatter.go7
-rw-r--r--internal/text/formatter_test.go51
-rw-r--r--internal/text/link.go2
-rw-r--r--internal/text/link_test.go64
-rw-r--r--internal/text/markdown.go20
-rw-r--r--internal/text/plain.go20
-rw-r--r--internal/text/plain_test.go84
-rw-r--r--internal/text/sanitize.go14
9 files changed, 242 insertions, 51 deletions
diff --git a/internal/text/common.go b/internal/text/common.go
index 0165af630..98ec892a7 100644
--- a/internal/text/common.go
+++ b/internal/text/common.go
@@ -21,6 +21,9 @@ package text
import (
"fmt"
"strings"
+
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/util"
)
// preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text.
@@ -35,7 +38,7 @@ func preformat(in string) string {
func postformat(in string) string {
// do some postformatting of the text
// 1. sanitize html to remove any dodgy scripts or other disallowed elements
- s := SanitizeHTML(in)
+ s := SanitizeOutgoing(in)
// 2. wrap the whole thing in a paragraph
s = fmt.Sprintf(`<p>%s</p>`, s)
// 3. remove any cheeky newlines
@@ -44,3 +47,29 @@ func postformat(in string) string {
s = strings.TrimSpace(s)
return s
}
+
+func (f *formatter) ReplaceTags(in string, tags []*gtsmodel.Tag) string {
+ return util.HashtagFinderRegex.ReplaceAllStringFunc(in, func(match string) string {
+ for _, tag := range tags {
+ if strings.TrimSpace(match) == fmt.Sprintf("#%s", tag.Name) {
+ tagContent := fmt.Sprintf(`<a href="%s" class="mention hashtag" rel="tag">#<span>%s</span></a>`, tag.URL, tag.Name)
+ if strings.HasPrefix(match, " ") {
+ tagContent = " " + tagContent
+ }
+ return tagContent
+ }
+ }
+ return in
+ })
+}
+
+func (f *formatter) ReplaceMentions(in string, mentions []*gtsmodel.Mention) string {
+ for _, menchie := range mentions {
+ targetAccount := &gtsmodel.Account{}
+ if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil {
+ mentionContent := fmt.Sprintf(`<span class="h-card"><a href="%s" class="u-url mention">@<span>%s</span></a></span>`, targetAccount.URL, targetAccount.Username)
+ in = strings.ReplaceAll(in, menchie.NameString, mentionContent)
+ }
+ }
+ return in
+}
diff --git a/internal/text/formatter.go b/internal/text/formatter.go
index f8cca6675..39aaae559 100644
--- a/internal/text/formatter.go
+++ b/internal/text/formatter.go
@@ -31,6 +31,13 @@ type Formatter interface {
FromMarkdown(md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string
// FromPlain parses an HTML text from a plaintext.
FromPlain(plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string
+
+ // ReplaceTags takes a piece of text and a slice of tags, and returns the same text with the tags nicely formatted as hrefs.
+ ReplaceTags(in string, tags []*gtsmodel.Tag) string
+ // ReplaceMentions takes a piece of text and a slice of mentions, and returns the same text with the mentions nicely formatted as hrefs.
+ ReplaceMentions(in string, mentions []*gtsmodel.Mention) string
+ // ReplaceLinks takes a piece of text, finds all recognizable links in that text, and replaces them with hrefs.
+ ReplaceLinks(in string) string
}
type formatter struct {
diff --git a/internal/text/formatter_test.go b/internal/text/formatter_test.go
new file mode 100644
index 000000000..2c9c18546
--- /dev/null
+++ b/internal/text/formatter_test.go
@@ -0,0 +1,51 @@
+/*
+ GoToSocial
+ Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package text_test
+
+import (
+ "github.com/sirupsen/logrus"
+ "github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/internal/config"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/oauth"
+ "github.com/superseriousbusiness/gotosocial/internal/text"
+)
+
+// nolint
+type TextStandardTestSuite struct {
+ // standard suite interfaces
+ suite.Suite
+ config *config.Config
+ db db.DB
+ log *logrus.Logger
+
+ // standard suite models
+ testTokens map[string]*oauth.Token
+ testClients map[string]*oauth.Client
+ testApplications map[string]*gtsmodel.Application
+ testUsers map[string]*gtsmodel.User
+ testAccounts map[string]*gtsmodel.Account
+ testAttachments map[string]*gtsmodel.MediaAttachment
+ testStatuses map[string]*gtsmodel.Status
+ testTags map[string]*gtsmodel.Tag
+
+ // module being tested
+ formatter text.Formatter
+}
diff --git a/internal/text/link.go b/internal/text/link.go
index 440571a83..d42cc3b68 100644
--- a/internal/text/link.go
+++ b/internal/text/link.go
@@ -82,7 +82,7 @@ func contains(urls []*url.URL, url *url.URL) bool {
// Note: because Go doesn't allow negative lookbehinds in regex, it's possible that an already-formatted
// href will end up double-formatted, if the text you pass here contains one or more hrefs already.
// To avoid this, you should sanitize any HTML out of text before you pass it into this function.
-func ReplaceLinks(in string) string {
+func (f *formatter) ReplaceLinks(in string) string {
rxStrict, err := xurls.StrictMatchingScheme(schemes)
if err != nil {
panic(err)
diff --git a/internal/text/link_test.go b/internal/text/link_test.go
index 636f26f7f..15e27f870 100644
--- a/internal/text/link_test.go
+++ b/internal/text/link_test.go
@@ -24,6 +24,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/text"
+ "github.com/superseriousbusiness/gotosocial/testrig"
)
const text1 = `
@@ -64,11 +65,40 @@ what happens when we already have a link within an href?
<a href="https://example.org">https://example.org</a>
`
-type TextTestSuite struct {
- suite.Suite
+type LinkTestSuite struct {
+ TextStandardTestSuite
}
-func (suite *TextTestSuite) TestParseURLsFromText1() {
+func (suite *LinkTestSuite) SetupSuite() {
+ suite.testTokens = testrig.NewTestTokens()
+ suite.testClients = testrig.NewTestClients()
+ suite.testApplications = testrig.NewTestApplications()
+ suite.testUsers = testrig.NewTestUsers()
+ suite.testAccounts = testrig.NewTestAccounts()
+ suite.testAttachments = testrig.NewTestAttachments()
+ suite.testStatuses = testrig.NewTestStatuses()
+ suite.testTags = testrig.NewTestTags()
+}
+
+func (suite *LinkTestSuite) SetupTest() {
+ suite.config = testrig.NewTestConfig()
+ suite.db = testrig.NewTestDB()
+ suite.log = testrig.NewTestLog()
+ suite.formatter = text.NewFormatter(suite.config, suite.db, suite.log)
+
+ testrig.StandardDBSetup(suite.db)
+}
+
+func (suite *LinkTestSuite) TearDownTest() {
+ testrig.StandardDBTeardown(suite.db)
+}
+
+func (suite *LinkTestSuite) TestParseSimple() {
+ f := suite.formatter.FromPlain(simple, nil, nil)
+ assert.Equal(suite.T(), simpleExpected, f)
+}
+
+func (suite *LinkTestSuite) TestParseURLsFromText1() {
urls, err := text.FindLinks(text1)
assert.NoError(suite.T(), err)
@@ -79,7 +109,7 @@ func (suite *TextTestSuite) TestParseURLsFromText1() {
assert.Equal(suite.T(), "https://example.orghttps://google.com", urls[3].String())
}
-func (suite *TextTestSuite) TestParseURLsFromText2() {
+func (suite *LinkTestSuite) TestParseURLsFromText2() {
urls, err := text.FindLinks(text2)
assert.NoError(suite.T(), err)
@@ -87,7 +117,7 @@ func (suite *TextTestSuite) TestParseURLsFromText2() {
assert.Len(suite.T(), urls, 1)
}
-func (suite *TextTestSuite) TestParseURLsFromText3() {
+func (suite *LinkTestSuite) TestParseURLsFromText3() {
urls, err := text.FindLinks(text3)
assert.NoError(suite.T(), err)
@@ -95,8 +125,8 @@ func (suite *TextTestSuite) TestParseURLsFromText3() {
assert.Len(suite.T(), urls, 0)
}
-func (suite *TextTestSuite) TestReplaceLinksFromText1() {
- replaced := text.ReplaceLinks(text1)
+func (suite *LinkTestSuite) TestReplaceLinksFromText1() {
+ replaced := suite.formatter.ReplaceLinks(text1)
assert.Equal(suite.T(), `
This is a text with some links in it. Here's link number one: <a href="https://example.org/link/to/something#fragment" rel="noopener">example.org/link/to/something#fragment</a>
@@ -110,8 +140,8 @@ really.cool.website <-- this one shouldn't be parsed as a link because it doesn'
`, replaced)
}
-func (suite *TextTestSuite) TestReplaceLinksFromText2() {
- replaced := text.ReplaceLinks(text2)
+func (suite *LinkTestSuite) TestReplaceLinksFromText2() {
+ replaced := suite.formatter.ReplaceLinks(text2)
assert.Equal(suite.T(), `
this is one link: <a href="https://example.org" rel="noopener">example.org</a>
@@ -121,16 +151,16 @@ these should be deduplicated
`, replaced)
}
-func (suite *TextTestSuite) TestReplaceLinksFromText3() {
+func (suite *LinkTestSuite) TestReplaceLinksFromText3() {
// we know mailto links won't be replaced with hrefs -- we only accept https and http
- replaced := text.ReplaceLinks(text3)
+ replaced := suite.formatter.ReplaceLinks(text3)
assert.Equal(suite.T(), `
here's a mailto link: mailto:whatever@test.org
`, replaced)
}
-func (suite *TextTestSuite) TestReplaceLinksFromText4() {
- replaced := text.ReplaceLinks(text4)
+func (suite *LinkTestSuite) TestReplaceLinksFromText4() {
+ replaced := suite.formatter.ReplaceLinks(text4)
assert.Equal(suite.T(), `
two similar links:
@@ -140,9 +170,9 @@ two similar links:
`, replaced)
}
-func (suite *TextTestSuite) TestReplaceLinksFromText5() {
+func (suite *LinkTestSuite) TestReplaceLinksFromText5() {
// we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function
- replaced := text.ReplaceLinks(text5)
+ replaced := suite.formatter.ReplaceLinks(text5)
assert.Equal(suite.T(), `
what happens when we already have a link within an href?
@@ -150,6 +180,6 @@ what happens when we already have a link within an href?
`, replaced)
}
-func TestTextTestSuite(t *testing.T) {
- suite.Run(t, new(TextTestSuite))
+func TestLinkTestSuite(t *testing.T) {
+ suite.Run(t, new(LinkTestSuite))
}
diff --git a/internal/text/markdown.go b/internal/text/markdown.go
index d1309f389..f9d12209a 100644
--- a/internal/text/markdown.go
+++ b/internal/text/markdown.go
@@ -19,9 +19,6 @@
package text
import (
- "fmt"
- "strings"
-
"github.com/russross/blackfriday/v2"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
@@ -39,20 +36,11 @@ func (f *formatter) FromMarkdown(md string, mentions []*gtsmodel.Mention, tags [
// do the markdown parsing *first*
content = string(blackfriday.Run([]byte(content), blackfriday.WithExtensions(bfExtensions)))
- // format mentions nicely
- for _, menchie := range mentions {
- targetAccount := &gtsmodel.Account{}
- if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil {
- mentionContent := fmt.Sprintf(`<span class="h-card"><a href="%s" class="u-url mention">@<span>%s</span></a></span>`, targetAccount.URL, targetAccount.Username)
- content = strings.ReplaceAll(content, menchie.NameString, mentionContent)
- }
- }
-
// format tags nicely
- for _, tag := range tags {
- tagContent := fmt.Sprintf(`<a href="%s" class="mention hashtag" rel="tag">#<span>%s</span></a>`, tag.URL, tag.Name)
- content = strings.ReplaceAll(content, fmt.Sprintf("#%s", tag.Name), tagContent)
- }
+ content = f.ReplaceTags(content, tags)
+
+ // format mentions nicely
+ content = f.ReplaceMentions(content, mentions)
return postformat(content)
}
diff --git a/internal/text/plain.go b/internal/text/plain.go
index 4f6659484..40fb6412f 100644
--- a/internal/text/plain.go
+++ b/internal/text/plain.go
@@ -19,7 +19,6 @@
package text
import (
- "fmt"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
@@ -29,22 +28,13 @@ func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags [
content := preformat(plain)
// format links nicely
- content = ReplaceLinks(content)
-
- // format mentions nicely
- for _, menchie := range mentions {
- targetAccount := &gtsmodel.Account{}
- if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil {
- mentionContent := fmt.Sprintf(`<span class="h-card"><a href="%s" class="u-url mention">@<span>%s</span></a></span>`, targetAccount.URL, targetAccount.Username)
- content = strings.ReplaceAll(content, menchie.NameString, mentionContent)
- }
- }
+ content = f.ReplaceLinks(content)
// format tags nicely
- for _, tag := range tags {
- tagContent := fmt.Sprintf(`<a href="%s" class="mention hashtag" rel="tag">#<span>%s</span></a>`, tag.URL, tag.Name)
- content = strings.ReplaceAll(content, fmt.Sprintf("#%s", tag.Name), tagContent)
- }
+ content = f.ReplaceTags(content, tags)
+
+ // format mentions nicely
+ content = f.ReplaceMentions(content, mentions)
// replace newlines with breaks
content = strings.ReplaceAll(content, "\n", "<br />")
diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go
new file mode 100644
index 000000000..1e0d1471a
--- /dev/null
+++ b/internal/text/plain_test.go
@@ -0,0 +1,84 @@
+/*
+ GoToSocial
+ Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package text_test
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/text"
+ "github.com/superseriousbusiness/gotosocial/testrig"
+)
+
+const (
+ simple = "this is a plain and simple status"
+ simpleExpected = "<p>this is a plain and simple status</p>"
+
+ withTag = "this is a simple status that uses hashtag #welcome!"
+ withTagExpected = "<p>this is a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>"
+)
+
+type PlainTestSuite struct {
+ TextStandardTestSuite
+}
+
+func (suite *PlainTestSuite) SetupSuite() {
+ suite.testTokens = testrig.NewTestTokens()
+ suite.testClients = testrig.NewTestClients()
+ suite.testApplications = testrig.NewTestApplications()
+ suite.testUsers = testrig.NewTestUsers()
+ suite.testAccounts = testrig.NewTestAccounts()
+ suite.testAttachments = testrig.NewTestAttachments()
+ suite.testStatuses = testrig.NewTestStatuses()
+ suite.testTags = testrig.NewTestTags()
+}
+
+func (suite *PlainTestSuite) SetupTest() {
+ suite.config = testrig.NewTestConfig()
+ suite.db = testrig.NewTestDB()
+ suite.log = testrig.NewTestLog()
+ suite.formatter = text.NewFormatter(suite.config, suite.db, suite.log)
+
+ testrig.StandardDBSetup(suite.db)
+}
+
+func (suite *PlainTestSuite) TearDownTest() {
+ testrig.StandardDBTeardown(suite.db)
+}
+
+func (suite *PlainTestSuite) TestParseSimple() {
+ f := suite.formatter.FromPlain(simple, nil, nil)
+ assert.Equal(suite.T(), simpleExpected, f)
+}
+
+func (suite *PlainTestSuite) TestParseWithTag() {
+
+ foundTags := []*gtsmodel.Tag{
+ suite.testTags["welcome"],
+ }
+
+ f := suite.formatter.FromPlain(withTag, nil, foundTags)
+ assert.Equal(suite.T(), withTagExpected, f)
+}
+
+func TestPlainTestSuite(t *testing.T) {
+ suite.Run(t, new(PlainTestSuite))
+}
diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go
index aac9d8aab..365875d46 100644
--- a/internal/text/sanitize.go
+++ b/internal/text/sanitize.go
@@ -30,7 +30,13 @@ import (
var regular *bluemonday.Policy = bluemonday.UGCPolicy().
RequireNoReferrerOnLinks(true).
RequireNoFollowOnLinks(true).
- RequireCrossOriginAnonymous(true)
+ RequireCrossOriginAnonymous(true).
+ AddTargetBlankToFullyQualifiedLinks(true)
+
+// outgoing policy should be used on statuses we've already parsed and added our own elements etc to. It is less strict than regular.
+var outgoing *bluemonday.Policy = regular.
+ AllowAttrs("class", "href", "rel").OnElements("a").
+ AllowAttrs("class").OnElements("span")
// '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist.
// An example usage scenario would be blog post titles where HTML tags are not expected at all
@@ -48,3 +54,9 @@ func SanitizeHTML(in string) string {
func RemoveHTML(in string) string {
return strict.Sanitize(in)
}
+
+// SanitizeOutgoing cleans up HTML in the given string, allowing through only safe elements and elements that were added during the parsing process.
+// This should be used on text that we've already converted into HTML, just to catch any weirdness.
+func SanitizeOutgoing(in string) string {
+ return outgoing.Sanitize(in)
+}