diff options
author | 2022-07-19 15:21:17 +0200 | |
---|---|---|
committer | 2022-07-19 15:21:17 +0200 | |
commit | c84384e6608368a13a774d6d33a8cc32da7cf209 (patch) | |
tree | a18aa9c1ced1299d2682c1993e1ba38f46448dba /internal/text | |
parent | [chore] use our own logging implementation (#716) (diff) | |
download | gotosocial-c84384e6608368a13a774d6d33a8cc32da7cf209.tar.xz |
[bugfix] html escape special characters in text instead of totally removing them (#719)
* remove minify dependency
* tidy up some tests
* remove pre + postformat funcs
* rework sanitization + formatting
* update tests
* add some more markdown tests
Diffstat (limited to 'internal/text')
-rw-r--r-- | internal/text/common.go | 33 | ||||
-rw-r--r-- | internal/text/common_test.go | 46 | ||||
-rw-r--r-- | internal/text/link_test.go | 20 | ||||
-rw-r--r-- | internal/text/markdown.go | 6 | ||||
-rw-r--r-- | internal/text/markdown_test.go | 32 | ||||
-rw-r--r-- | internal/text/minify.go | 39 | ||||
-rw-r--r-- | internal/text/plain.go | 10 | ||||
-rw-r--r-- | internal/text/plain_test.go | 36 | ||||
-rw-r--r-- | internal/text/sanitize.go | 7 |
9 files changed, 70 insertions, 159 deletions
diff --git a/internal/text/common.go b/internal/text/common.go index 9ed3fb06f..005f9dfe1 100644 --- a/internal/text/common.go +++ b/internal/text/common.go @@ -21,7 +21,6 @@ package text import ( "bytes" "context" - "html" "strings" "unicode" @@ -30,38 +29,6 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/regexes" ) -// preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text. -func preformat(in string) string { - // do some preformatting of the text - - // 1. unescape everything that might be html escaped - s := html.UnescapeString(in) - - // 2. trim leading or trailing whitespace - s = strings.TrimSpace(s) - return s -} - -// postformat contains some common logic for html sanitization of text, wrapping elements, and trimming newlines and whitespace -func postformat(in string) string { - // do some postformatting of the text - - // 1. sanitize html to remove potentially dangerous elements - s := SanitizeHTML(in) - - // 2. the sanitize step tends to escape characters inside codeblocks, which is behavior we don't want, so unescape everything again - s = html.UnescapeString(s) - - // 3. minify html to remove any trailing newlines, spaces, unnecessary elements, etc etc - mini, err := MinifyHTML(s) - if err != nil { - // if the minify failed, just return what we have - return s - } - // return minified version of the html - return mini -} - func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string { return regexes.ReplaceAllStringFunc(regexes.HashtagFinder, in, func(match string, buf *bytes.Buffer) string { // we have a match diff --git a/internal/text/common_test.go b/internal/text/common_test.go index 5e8f05b30..48f5240d2 100644 --- a/internal/text/common_test.go +++ b/internal/text/common_test.go @@ -28,44 +28,14 @@ import ( ) const ( - replaceMentionsString = `Another test @foss_satan@fossbros-anonymous.io - -#Hashtag - -Text` - replaceMentionsExpected = `Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention">@<span>foss_satan</span></a></span> - -#Hashtag - -Text` - - replaceHashtagsExpected = `Another test @foss_satan@fossbros-anonymous.io - -<a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag">#<span>Hashtag</span></a> - -Text` - - replaceHashtagsAfterMentionsExpected = `Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention">@<span>foss_satan</span></a></span> - -<a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag">#<span>Hashtag</span></a> - -Text` - - replaceMentionsWithLinkString = `Another test @foss_satan@fossbros-anonymous.io - -http://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060` - - replaceMentionsWithLinkStringExpected = `Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention">@<span>foss_satan</span></a></span> - -http://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060` - - replaceMentionsWithLinkSelfString = `Mentioning myself: @the_mighty_zork - -and linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR` - - replaceMemtionsWithLinkSelfExpected = `Mentioning myself: <span class="h-card"><a href="http://localhost:8080/@the_mighty_zork" class="u-url mention">@<span>the_mighty_zork</span></a></span> - -and linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR` + replaceMentionsString = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" + replaceMentionsExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n#Hashtag\n\nText" + replaceHashtagsExpected = "Another test @foss_satan@fossbros-anonymous.io\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText" + replaceHashtagsAfterMentionsExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText" + replaceMentionsWithLinkString = "Another test @foss_satan@fossbros-anonymous.io\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" + replaceMentionsWithLinkStringExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" + replaceMentionsWithLinkSelfString = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" + replaceMemtionsWithLinkSelfExpected = "Mentioning myself: <span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\">@<span>the_mighty_zork</span></a></span>\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" ) type CommonTestSuite struct { diff --git a/internal/text/link_test.go b/internal/text/link_test.go index 24484e02d..e50a8dd69 100644 --- a/internal/text/link_test.go +++ b/internal/text/link_test.go @@ -71,16 +71,16 @@ type LinkTestSuite struct { func (suite *LinkTestSuite) TestParseSimple() { f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) - assert.Equal(suite.T(), simpleExpected, f) + suite.Equal(simpleExpected, f) } func (suite *LinkTestSuite) TestParseURLsFromText1() { urls := text.FindLinks(text1) - assert.Equal(suite.T(), "https://example.org/link/to/something#fragment", urls[0].String()) - assert.Equal(suite.T(), "http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) - assert.Equal(suite.T(), "https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) - assert.Equal(suite.T(), "https://example.orghttps://google.com", urls[3].String()) + suite.Equal("https://example.org/link/to/something#fragment", urls[0].String()) + suite.Equal("http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) + suite.Equal("https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) + suite.Equal("https://example.orghttps://google.com", urls[3].String()) } func (suite *LinkTestSuite) TestParseURLsFromText2() { @@ -99,7 +99,7 @@ func (suite *LinkTestSuite) TestParseURLsFromText3() { func (suite *LinkTestSuite) TestReplaceLinksFromText1() { replaced := suite.formatter.ReplaceLinks(context.Background(), text1) - assert.Equal(suite.T(), ` + suite.Equal(` This is a text with some links in it. Here's link number one: <a href="https://example.org/link/to/something#fragment" rel="noopener">example.org/link/to/something#fragment</a> Here's link number two: <a href="http://test.example.org?q=bahhhhhhhhhhhh" rel="noopener">test.example.org?q=bahhhhhhhhhhhh</a> @@ -114,7 +114,7 @@ really.cool.website <-- this one shouldn't be parsed as a link because it doesn' func (suite *LinkTestSuite) TestReplaceLinksFromText2() { replaced := suite.formatter.ReplaceLinks(context.Background(), text2) - assert.Equal(suite.T(), ` + suite.Equal(` this is one link: <a href="https://example.org" rel="noopener">example.org</a> this is the same link again: <a href="https://example.org" rel="noopener">example.org</a> @@ -126,14 +126,14 @@ these should be deduplicated func (suite *LinkTestSuite) TestReplaceLinksFromText3() { // we know mailto links won't be replaced with hrefs -- we only accept https and http replaced := suite.formatter.ReplaceLinks(context.Background(), text3) - assert.Equal(suite.T(), ` + suite.Equal(` here's a mailto link: mailto:whatever@test.org `, replaced) } func (suite *LinkTestSuite) TestReplaceLinksFromText4() { replaced := suite.formatter.ReplaceLinks(context.Background(), text4) - assert.Equal(suite.T(), ` + suite.Equal(` two similar links: <a href="https://example.org" rel="noopener">example.org</a> @@ -145,7 +145,7 @@ two similar links: func (suite *LinkTestSuite) TestReplaceLinksFromText5() { // we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function replaced := suite.formatter.ReplaceLinks(context.Background(), text5) - assert.Equal(suite.T(), ` + suite.Equal(` what happens when we already have a link within an href? <a href="<a href="https://example.org" rel="noopener">example.org</a>"><a href="https://example.org" rel="noopener">example.org</a></a> diff --git a/internal/text/markdown.go b/internal/text/markdown.go index 01238954f..a5c62f23f 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -26,13 +26,11 @@ import ( ) func (f *formatter) FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { - content := preformat(md) - // do the markdown parsing *first* - contentBytes := blackfriday.Run([]byte(content)) + contentBytes := blackfriday.Run([]byte(md)) // format tags nicely - content = f.ReplaceTags(ctx, string(contentBytes), tags) + content := f.ReplaceTags(ctx, string(contentBytes), tags) // format mentions nicely content = f.ReplaceMentions(ctx, content, mentions) diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go index 111cfe473..74a18a685 100644 --- a/internal/text/markdown_test.go +++ b/internal/text/markdown_test.go @@ -44,15 +44,19 @@ that was some JSON :) ` const ( - simpleMarkdown = "# Title\n\nHere's a simple text in markdown.\n\nHere's a [link](https://example.org)." - simpleMarkdownExpected = "<h1>Title</h1>\n\n<p>Here’s a simple text in markdown.</p>\n\n<p>Here’s a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p>\n" - withCodeBlockExpected = "<h1>Title</h1>\n\n<p>Below is some JSON.</p>\n\n<pre><code class=\"language-json\">{\n "key": "value",\n "another_key": [\n "value1",\n "value2"\n ]\n}\n</code></pre>\n\n<p>that was some JSON :)</p>\n" - withInlineCode = "`Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?`" - withInlineCodeExpected = "<p><code>Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?</code></p>\n" - withInlineCode2 = "`Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?`" - withInlineCode2Expected = "<p><code>Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?</code></p>\n" - withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" - withHashtagExpected = "<h1>Title</h1>\n\n<p>here’s a simple status that uses hashtag <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a>!</p>\n" + simpleMarkdown = "# Title\n\nHere's a simple text in markdown.\n\nHere's a [link](https://example.org)." + simpleMarkdownExpected = "<h1>Title</h1>\n\n<p>Here’s a simple text in markdown.</p>\n\n<p>Here’s a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p>\n" + withCodeBlockExpected = "<h1>Title</h1>\n\n<p>Below is some JSON.</p>\n\n<pre><code class=\"language-json\">{\n "key": "value",\n "another_key": [\n "value1",\n "value2"\n ]\n}\n</code></pre>\n\n<p>that was some JSON :)</p>\n" + withInlineCode = "`Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?`" + withInlineCodeExpected = "<p><code>Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?</code></p>\n" + withInlineCode2 = "`Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?`" + withInlineCode2Expected = "<p><code>Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?</code></p>\n" + withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" + withHashtagExpected = "<h1>Title</h1>\n\n<p>here’s a simple status that uses hashtag <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a>!</p>\n" + mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a <a href=\"https://example.org\">link</a>.\n\nHere's an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\">" + mdWithHTMLExpected = "<h1>Title</h1>\n\n<p>Here’s a simple text in markdown.</p>\n\n<p>Here’s a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p>\n\n<p>Here’s an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\" crossorigin=\"anonymous\"></p>\n" + mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: <script>alert(ahhhh)</script>" + mdWithCheekyHTMLExpected = "<h1>Title</h1>\n\n<p>Here’s a simple text in markdown.</p>\n\n<p>Here’s a cheeky little script: </p>\n" ) type MarkdownTestSuite struct { @@ -88,6 +92,16 @@ func (suite *MarkdownTestSuite) TestParseWithHashtag() { suite.Equal(withHashtagExpected, s) } +func (suite *MarkdownTestSuite) TestParseWithHTML() { + s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil) + suite.Equal(mdWithHTMLExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() { + s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil) + suite.Equal(mdWithCheekyHTMLExpected, s) +} + func TestMarkdownTestSuite(t *testing.T) { suite.Run(t, new(MarkdownTestSuite)) } diff --git a/internal/text/minify.go b/internal/text/minify.go deleted file mode 100644 index e2515b9a4..000000000 --- a/internal/text/minify.go +++ /dev/null @@ -1,39 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -package text - -import ( - "github.com/tdewolff/minify/v2" - "github.com/tdewolff/minify/v2/html" -) - -var m *minify.M - -// MinifyHTML runs html through a minifier, reducing it in size. -func MinifyHTML(in string) (string, error) { - if m == nil { - m = minify.New() - m.Add("text/html", &html.Minifier{ - KeepQuotes: true, - KeepEndTags: true, - KeepDocumentTags: true, - }) - } - return m.String("text/html", in) -} diff --git a/internal/text/plain.go b/internal/text/plain.go index bc10d1b67..3daea5686 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -20,6 +20,7 @@ package text import ( "context" + "html" "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -32,10 +33,11 @@ var breakReplacer = strings.NewReplacer( ) func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { - content := preformat(plain) + // trim any crap + content := strings.TrimSpace(plain) - // sanitize any html elements - content = removeHTML(content) + // clean 'er up + content = html.EscapeString(content) // format links nicely content = f.ReplaceLinks(ctx, content) @@ -52,5 +54,5 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts // wrap the whole thing in a pee content = `<p>` + content + `</p>` - return postformat(content) + return SanitizeHTML(content) } diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 2b7b50d5e..cd82e0d1b 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -20,27 +20,21 @@ package text_test import ( "context" - "fmt" "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) const ( - simple = "this is a plain and simple status" - simpleExpected = "<p>this is a plain and simple status</p>" - - withTag = "here's a simple status that uses hashtag #welcome!" - withTagExpected = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" - - moreComplex = `Another test @foss_satan@fossbros-anonymous.io - -#Hashtag - -Text` - moreComplexFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text</p>" + simple = "this is a plain and simple status" + simpleExpected = "<p>this is a plain and simple status</p>" + withTag = "here's a simple status that uses hashtag #welcome!" + withTagExpected = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" + withHTML = "<div>blah this should just be html escaped blah</div>" + withHTMLExpected = "<p><div>blah this should just be html escaped blah</div></p>" + moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" + moreComplexFull = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/>Text</p>" ) type PlainTestSuite struct { @@ -49,7 +43,7 @@ type PlainTestSuite struct { func (suite *PlainTestSuite) TestParseSimple() { f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) - assert.Equal(suite.T(), simpleExpected, f) + suite.Equal(simpleExpected, f) } func (suite *PlainTestSuite) TestParseWithTag() { @@ -58,7 +52,12 @@ func (suite *PlainTestSuite) TestParseWithTag() { } f := suite.formatter.FromPlain(context.Background(), withTag, nil, foundTags) - assert.Equal(suite.T(), withTagExpected, f) + suite.Equal(withTagExpected, f) +} + +func (suite *PlainTestSuite) TestParseWithHTML() { + f := suite.formatter.FromPlain(context.Background(), withHTML, nil, nil) + suite.Equal(withHTMLExpected, f) } func (suite *PlainTestSuite) TestParseMoreComplex() { @@ -71,10 +70,7 @@ func (suite *PlainTestSuite) TestParseMoreComplex() { } f := suite.formatter.FromPlain(context.Background(), moreComplex, foundMentions, foundTags) - - fmt.Println(f) - - assert.Equal(suite.T(), moreComplexFull, f) + suite.Equal(moreComplexFull, f) } func TestPlainTestSuite(t *testing.T) { diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index d4faabbb1..96b7ef994 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -19,7 +19,9 @@ package text import ( + "html" "regexp" + "strings" "github.com/microcosm-cc/bluemonday" ) @@ -59,7 +61,8 @@ func SanitizeHTML(in string) string { // SanitizePlaintext runs text through basic sanitization. This removes // any html elements that were in the string, and returns clean plaintext. func SanitizePlaintext(in string) string { - content := preformat(in) + content := html.UnescapeString(in) content = removeHTML(content) - return postformat(content) + content = html.UnescapeString(content) + return strings.TrimSpace(content) } |