diff options
Diffstat (limited to 'internal/text')
-rw-r--r-- | internal/text/common.go | 33 | ||||
-rw-r--r-- | internal/text/markdown.go | 11 | ||||
-rw-r--r-- | internal/text/markdown_test.go | 116 | ||||
-rw-r--r-- | internal/text/minify.go | 39 | ||||
-rw-r--r-- | internal/text/plain.go | 7 | ||||
-rw-r--r-- | internal/text/plain_test.go | 8 | ||||
-rw-r--r-- | internal/text/sanitize.go | 17 | ||||
-rw-r--r-- | internal/text/sanitize_test.go | 75 |
8 files changed, 271 insertions, 35 deletions
diff --git a/internal/text/common.go b/internal/text/common.go index 4f0bad9dc..f6a5ca5f5 100644 --- a/internal/text/common.go +++ b/internal/text/common.go @@ -20,6 +20,7 @@ package text import ( "fmt" + "html" "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -29,23 +30,33 @@ import ( // preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text. func preformat(in string) string { // do some preformatting of the text - // 1. Trim all the whitespace - s := strings.TrimSpace(in) + + // 1. unescape everything that might be html escaped + s := html.UnescapeString(in) + + // 2. trim leading or trailing whitespace + s = strings.TrimSpace(s) return s } // postformat contains some common logic for html sanitization of text, wrapping elements, and trimming newlines and whitespace func postformat(in string) string { // do some postformatting of the text - // 1. sanitize html to remove any dodgy scripts or other disallowed elements - s := SanitizeOutgoing(in) - // 2. wrap the whole thing in a paragraph - s = fmt.Sprintf(`<p>%s</p>`, s) - // 3. remove any cheeky newlines - s = strings.ReplaceAll(s, "\n", "") - // 4. remove any whitespace added as a result of the formatting - s = strings.TrimSpace(s) - return s + + // 1. sanitize html to remove potentially dangerous elements + s := SanitizeHTML(in) + + // 2. the sanitize step tends to escape characters inside codeblocks, which is behavior we don't want, so unescape everything again + s = html.UnescapeString(s) + + // 3. minify html to remove any trailing newlines, spaces, unnecessary elements, etc etc + mini, err := minifyHTML(s) + if err != nil { + // if the minify failed, just return what we have + return s + } + // return minified version of the html + return mini } func (f *formatter) ReplaceTags(in string, tags []*gtsmodel.Tag) string { diff --git a/internal/text/markdown.go b/internal/text/markdown.go index f9d12209a..5a7603615 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -23,21 +23,14 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) -var bfExtensions = blackfriday.NoIntraEmphasis | - blackfriday.FencedCode | - blackfriday.Autolink | - blackfriday.Strikethrough | - blackfriday.SpaceHeadings | - blackfriday.BackslashLineBreak - func (f *formatter) FromMarkdown(md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { content := preformat(md) // do the markdown parsing *first* - content = string(blackfriday.Run([]byte(content), blackfriday.WithExtensions(bfExtensions))) + contentBytes := blackfriday.Run([]byte(content)) // format tags nicely - content = f.ReplaceTags(content, tags) + content = f.ReplaceTags(string(contentBytes), tags) // format mentions nicely content = f.ReplaceMentions(content, mentions) diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go new file mode 100644 index 000000000..432e9a4ec --- /dev/null +++ b/internal/text/markdown_test.go @@ -0,0 +1,116 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text_test + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/text" + "github.com/superseriousbusiness/gotosocial/testrig" +) + +const ( + simpleMarkdown = `# Title + +Here's a simple text in markdown. + +Here's a [link](https://example.org).` + + simpleMarkdownExpected = "<h1>Title</h1><p>Here’s a simple text in markdown.</p><p>Here’s a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p>" + + withCodeBlockExpected = "<h1>Title</h1><p>Below is some JSON.</p><pre><code class=\"language-json\">{\n \"key\": \"value\",\n \"another_key\": [\n \"value1\",\n \"value2\"\n ]\n}\n</code></pre><p>that was some JSON :)</p>" + + withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" + withHashtagExpected = "<h1>Title</h1><p>here’s a simple status that uses hashtag <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a>!</p>" +) + +var ( + withCodeBlock = `# Title + +Below is some JSON. + +` + "```" + `json +{ + "key": "value", + "another_key": [ + "value1", + "value2" + ] +} +` + "```" + ` + +that was some JSON :) +` +) + +type MarkdownTestSuite struct { + TextStandardTestSuite +} + +func (suite *MarkdownTestSuite) SetupSuite() { + suite.testTokens = testrig.NewTestTokens() + suite.testClients = testrig.NewTestClients() + suite.testApplications = testrig.NewTestApplications() + suite.testUsers = testrig.NewTestUsers() + suite.testAccounts = testrig.NewTestAccounts() + suite.testAttachments = testrig.NewTestAttachments() + suite.testStatuses = testrig.NewTestStatuses() + suite.testTags = testrig.NewTestTags() + suite.testMentions = testrig.NewTestMentions() +} + +func (suite *MarkdownTestSuite) SetupTest() { + suite.config = testrig.NewTestConfig() + suite.db = testrig.NewTestDB() + suite.log = testrig.NewTestLog() + suite.formatter = text.NewFormatter(suite.config, suite.db, suite.log) + + testrig.StandardDBSetup(suite.db, suite.testAccounts) +} + +func (suite *MarkdownTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.db) +} + +func (suite *MarkdownTestSuite) TestParseSimple() { + s := suite.formatter.FromMarkdown(simpleMarkdown, nil, nil) + suite.Equal(simpleMarkdownExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseWithCodeBlock() { + fmt.Println(withCodeBlock) + s := suite.formatter.FromMarkdown(withCodeBlock, nil, nil) + suite.Equal(withCodeBlockExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseWithHashtag() { + foundTags := []*gtsmodel.Tag{ + suite.testTags["Hashtag"], + } + + s := suite.formatter.FromMarkdown(withHashtag, nil, foundTags) + suite.Equal(withHashtagExpected, s) +} + +func TestMarkdownTestSuite(t *testing.T) { + suite.Run(t, new(MarkdownTestSuite)) +} diff --git a/internal/text/minify.go b/internal/text/minify.go new file mode 100644 index 000000000..c6d7b9bc1 --- /dev/null +++ b/internal/text/minify.go @@ -0,0 +1,39 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "github.com/tdewolff/minify/v2" + "github.com/tdewolff/minify/v2/html" +) + +var m *minify.M + +// minifyHTML runs html through a minifier, reducing it in size. +func minifyHTML(in string) (string, error) { + if m == nil { + m = minify.New() + m.Add("text/html", &html.Minifier{ + KeepQuotes: true, + KeepEndTags: true, + KeepDocumentTags: true, + }) + } + return m.String("text/html", in) +} diff --git a/internal/text/plain.go b/internal/text/plain.go index 40fb6412f..a44e02c80 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -19,6 +19,7 @@ package text import ( + "fmt" "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -27,6 +28,9 @@ import ( func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { content := preformat(plain) + // sanitize any html elements + content = RemoveHTML(content) + // format links nicely content = f.ReplaceLinks(content) @@ -39,5 +43,8 @@ func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags [ // replace newlines with breaks content = strings.ReplaceAll(content, "\n", "<br />") + // wrap the whole thing in a pee + content = fmt.Sprintf(`<p>%s</p>`, content) + return postformat(content) } diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 2f9eb3a29..33c95234c 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -33,15 +33,15 @@ const ( simple = "this is a plain and simple status" simpleExpected = "<p>this is a plain and simple status</p>" - withTag = "this is a simple status that uses hashtag #welcome!" - withTagExpected = "<p>this is a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" + withTag = "here's a simple status that uses hashtag #welcome!" + withTagExpected = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" moreComplex = `Another test @foss_satan@fossbros-anonymous.io #Hashtag Text` - moreComplexExpected = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>` + moreComplexFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text</p>" ) type PlainTestSuite struct { @@ -102,7 +102,7 @@ func (suite *PlainTestSuite) TestParseMoreComplex() { fmt.Println(f) - assert.Equal(suite.T(), moreComplexExpected, f) + assert.Equal(suite.T(), moreComplexFull, f) } func TestPlainTestSuite(t *testing.T) { diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index 365875d46..e1bc73559 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -19,6 +19,8 @@ package text import ( + "regexp" + "github.com/microcosm-cc/bluemonday" ) @@ -31,12 +33,11 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy(). RequireNoReferrerOnLinks(true). RequireNoFollowOnLinks(true). RequireCrossOriginAnonymous(true). - AddTargetBlankToFullyQualifiedLinks(true) - -// outgoing policy should be used on statuses we've already parsed and added our own elements etc to. It is less strict than regular. -var outgoing *bluemonday.Policy = regular. + AddTargetBlankToFullyQualifiedLinks(true). AllowAttrs("class", "href", "rel").OnElements("a"). - AllowAttrs("class").OnElements("span") + AllowAttrs("class").OnElements("span"). + AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code"). + SkipElementsContent("code", "pre") // '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist. // An example usage scenario would be blog post titles where HTML tags are not expected at all @@ -54,9 +55,3 @@ func SanitizeHTML(in string) string { func RemoveHTML(in string) string { return strict.Sanitize(in) } - -// SanitizeOutgoing cleans up HTML in the given string, allowing through only safe elements and elements that were added during the parsing process. -// This should be used on text that we've already converted into HTML, just to catch any weirdness. -func SanitizeOutgoing(in string) string { - return outgoing.Sanitize(in) -} diff --git a/internal/text/sanitize_test.go b/internal/text/sanitize_test.go new file mode 100644 index 000000000..19a5f6a06 --- /dev/null +++ b/internal/text/sanitize_test.go @@ -0,0 +1,75 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text_test + +import ( + "testing" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/text" +) + +const ( + removeHTML = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>` + removedHTML = `Another test @foss_satan#HashtagText` + + sanitizeHTML = `here's some naughty html: <script>alert(ahhhh)</script> !!!` + sanitizedHTML = `here's some naughty html: !!!` + + withEscapedLiteral = `it\u0026amp;#39;s its it is` + withEscapedLiteralExpected = `it\u0026amp;#39;s its it is` + withEscaped = "it\u0026amp;#39;s its it is" + withEscapedExpected = "it&#39;s its it is" + + sanitizeOutgoing = `<p>gotta test some fucking ''''''''' marks</p>` + sanitizedOutgoing = `<p>gotta test some fucking ''''''''' marks</p>` +) + +type SanitizeTestSuite struct { + suite.Suite +} + +func (suite *SanitizeTestSuite) TestRemoveHTML() { + s := text.RemoveHTML(removeHTML) + suite.Equal(removedHTML, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeOutgoing() { + s := text.SanitizeHTML(sanitizeOutgoing) + suite.Equal(sanitizedOutgoing, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeHTML() { + s := text.SanitizeHTML(sanitizeHTML) + suite.Equal(sanitizedHTML, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeWithEscapedLiteral() { + s := text.RemoveHTML(withEscapedLiteral) + suite.Equal(withEscapedLiteralExpected, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeWithEscaped() { + s := text.RemoveHTML(withEscaped) + suite.Equal(withEscapedExpected, s) +} + +func TestSanitizeTestSuite(t *testing.T) { + suite.Run(t, new(SanitizeTestSuite)) +} |