From c84384e6608368a13a774d6d33a8cc32da7cf209 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Tue, 19 Jul 2022 15:21:17 +0200 Subject: [bugfix] html escape special characters in text instead of totally removing them (#719) * remove minify dependency * tidy up some tests * remove pre + postformat funcs * rework sanitization + formatting * update tests * add some more markdown tests --- internal/text/common.go | 33 ------------------------------ internal/text/common_test.go | 46 ++++++++---------------------------------- internal/text/link_test.go | 20 +++++++++--------- internal/text/markdown.go | 6 ++---- internal/text/markdown_test.go | 32 ++++++++++++++++++++--------- internal/text/minify.go | 39 ----------------------------------- internal/text/plain.go | 10 +++++---- internal/text/plain_test.go | 36 +++++++++++++++------------------ internal/text/sanitize.go | 7 +++++-- 9 files changed, 70 insertions(+), 159 deletions(-) delete mode 100644 internal/text/minify.go (limited to 'internal/text') diff --git a/internal/text/common.go b/internal/text/common.go index 9ed3fb06f..005f9dfe1 100644 --- a/internal/text/common.go +++ b/internal/text/common.go @@ -21,7 +21,6 @@ package text import ( "bytes" "context" - "html" "strings" "unicode" @@ -30,38 +29,6 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/regexes" ) -// preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text. -func preformat(in string) string { - // do some preformatting of the text - - // 1. unescape everything that might be html escaped - s := html.UnescapeString(in) - - // 2. trim leading or trailing whitespace - s = strings.TrimSpace(s) - return s -} - -// postformat contains some common logic for html sanitization of text, wrapping elements, and trimming newlines and whitespace -func postformat(in string) string { - // do some postformatting of the text - - // 1. sanitize html to remove potentially dangerous elements - s := SanitizeHTML(in) - - // 2. the sanitize step tends to escape characters inside codeblocks, which is behavior we don't want, so unescape everything again - s = html.UnescapeString(s) - - // 3. minify html to remove any trailing newlines, spaces, unnecessary elements, etc etc - mini, err := MinifyHTML(s) - if err != nil { - // if the minify failed, just return what we have - return s - } - // return minified version of the html - return mini -} - func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string { return regexes.ReplaceAllStringFunc(regexes.HashtagFinder, in, func(match string, buf *bytes.Buffer) string { // we have a match diff --git a/internal/text/common_test.go b/internal/text/common_test.go index 5e8f05b30..48f5240d2 100644 --- a/internal/text/common_test.go +++ b/internal/text/common_test.go @@ -28,44 +28,14 @@ import ( ) const ( - replaceMentionsString = `Another test @foss_satan@fossbros-anonymous.io - -#Hashtag - -Text` - replaceMentionsExpected = `Another test @foss_satan - -#Hashtag - -Text` - - replaceHashtagsExpected = `Another test @foss_satan@fossbros-anonymous.io - - - -Text` - - replaceHashtagsAfterMentionsExpected = `Another test @foss_satan - - - -Text` - - replaceMentionsWithLinkString = `Another test @foss_satan@fossbros-anonymous.io - -http://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060` - - replaceMentionsWithLinkStringExpected = `Another test @foss_satan - -http://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060` - - replaceMentionsWithLinkSelfString = `Mentioning myself: @the_mighty_zork - -and linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR` - - replaceMemtionsWithLinkSelfExpected = `Mentioning myself: @the_mighty_zork - -and linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR` + replaceMentionsString = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" + replaceMentionsExpected = "Another test @foss_satan\n\n#Hashtag\n\nText" + replaceHashtagsExpected = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" + replaceHashtagsAfterMentionsExpected = "Another test @foss_satan\n\n#Hashtag\n\nText" + replaceMentionsWithLinkString = "Another test @foss_satan@fossbros-anonymous.io\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" + replaceMentionsWithLinkStringExpected = "Another test @foss_satan\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" + replaceMentionsWithLinkSelfString = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" + replaceMemtionsWithLinkSelfExpected = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" ) type CommonTestSuite struct { diff --git a/internal/text/link_test.go b/internal/text/link_test.go index 24484e02d..e50a8dd69 100644 --- a/internal/text/link_test.go +++ b/internal/text/link_test.go @@ -71,16 +71,16 @@ type LinkTestSuite struct { func (suite *LinkTestSuite) TestParseSimple() { f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) - assert.Equal(suite.T(), simpleExpected, f) + suite.Equal(simpleExpected, f) } func (suite *LinkTestSuite) TestParseURLsFromText1() { urls := text.FindLinks(text1) - assert.Equal(suite.T(), "https://example.org/link/to/something#fragment", urls[0].String()) - assert.Equal(suite.T(), "http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) - assert.Equal(suite.T(), "https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) - assert.Equal(suite.T(), "https://example.orghttps://google.com", urls[3].String()) + suite.Equal("https://example.org/link/to/something#fragment", urls[0].String()) + suite.Equal("http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) + suite.Equal("https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) + suite.Equal("https://example.orghttps://google.com", urls[3].String()) } func (suite *LinkTestSuite) TestParseURLsFromText2() { @@ -99,7 +99,7 @@ func (suite *LinkTestSuite) TestParseURLsFromText3() { func (suite *LinkTestSuite) TestReplaceLinksFromText1() { replaced := suite.formatter.ReplaceLinks(context.Background(), text1) - assert.Equal(suite.T(), ` + suite.Equal(` This is a text with some links in it. Here's link number one: example.org/link/to/something#fragment Here's link number two: test.example.org?q=bahhhhhhhhhhhh @@ -114,7 +114,7 @@ really.cool.website <-- this one shouldn't be parsed as a link because it doesn' func (suite *LinkTestSuite) TestReplaceLinksFromText2() { replaced := suite.formatter.ReplaceLinks(context.Background(), text2) - assert.Equal(suite.T(), ` + suite.Equal(` this is one link: example.org this is the same link again: example.org @@ -126,14 +126,14 @@ these should be deduplicated func (suite *LinkTestSuite) TestReplaceLinksFromText3() { // we know mailto links won't be replaced with hrefs -- we only accept https and http replaced := suite.formatter.ReplaceLinks(context.Background(), text3) - assert.Equal(suite.T(), ` + suite.Equal(` here's a mailto link: mailto:whatever@test.org `, replaced) } func (suite *LinkTestSuite) TestReplaceLinksFromText4() { replaced := suite.formatter.ReplaceLinks(context.Background(), text4) - assert.Equal(suite.T(), ` + suite.Equal(` two similar links: example.org @@ -145,7 +145,7 @@ two similar links: func (suite *LinkTestSuite) TestReplaceLinksFromText5() { // we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function replaced := suite.formatter.ReplaceLinks(context.Background(), text5) - assert.Equal(suite.T(), ` + suite.Equal(` what happens when we already have a link within an href? example.org">example.org diff --git a/internal/text/markdown.go b/internal/text/markdown.go index 01238954f..a5c62f23f 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -26,13 +26,11 @@ import ( ) func (f *formatter) FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { - content := preformat(md) - // do the markdown parsing *first* - contentBytes := blackfriday.Run([]byte(content)) + contentBytes := blackfriday.Run([]byte(md)) // format tags nicely - content = f.ReplaceTags(ctx, string(contentBytes), tags) + content := f.ReplaceTags(ctx, string(contentBytes), tags) // format mentions nicely content = f.ReplaceMentions(ctx, content, mentions) diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go index 111cfe473..74a18a685 100644 --- a/internal/text/markdown_test.go +++ b/internal/text/markdown_test.go @@ -44,15 +44,19 @@ that was some JSON :) ` const ( - simpleMarkdown = "# Title\n\nHere's a simple text in markdown.\n\nHere's a [link](https://example.org)." - simpleMarkdownExpected = "

Title

\n\n

Here’s a simple text in markdown.

\n\n

Here’s a link.

\n" - withCodeBlockExpected = "

Title

\n\n

Below is some JSON.

\n\n
{\n  "key": "value",\n  "another_key": [\n    "value1",\n    "value2"\n  ]\n}\n
\n\n

that was some JSON :)

\n" - withInlineCode = "`Nobody tells you about the SECRET CODE, do they?`" - withInlineCodeExpected = "

Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?

\n" - withInlineCode2 = "`Nobody tells you about the SECRET CODE, do they?`" - withInlineCode2Expected = "

Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?

\n" - withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" - withHashtagExpected = "

Title

\n\n

here’s a simple status that uses hashtag #Hashtag!

\n" + simpleMarkdown = "# Title\n\nHere's a simple text in markdown.\n\nHere's a [link](https://example.org)." + simpleMarkdownExpected = "

Title

\n\n

Here’s a simple text in markdown.

\n\n

Here’s a link.

\n" + withCodeBlockExpected = "

Title

\n\n

Below is some JSON.

\n\n
{\n  "key": "value",\n  "another_key": [\n    "value1",\n    "value2"\n  ]\n}\n
\n\n

that was some JSON :)

\n" + withInlineCode = "`Nobody tells you about the SECRET CODE, do they?`" + withInlineCodeExpected = "

Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?

\n" + withInlineCode2 = "`Nobody tells you about the
SECRET CODE, do they?`" + withInlineCode2Expected = "

Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?

\n" + withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" + withHashtagExpected = "

Title

\n\n

here’s a simple status that uses hashtag #Hashtag!

\n" + mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a link.\n\nHere's an image: \"The" + mdWithHTMLExpected = "

Title

\n\n

Here’s a simple text in markdown.

\n\n

Here’s a link.

\n\n

Here’s an image: \"The

\n" + mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: " + mdWithCheekyHTMLExpected = "

Title

\n\n

Here’s a simple text in markdown.

\n\n

Here’s a cheeky little script:

\n" ) type MarkdownTestSuite struct { @@ -88,6 +92,16 @@ func (suite *MarkdownTestSuite) TestParseWithHashtag() { suite.Equal(withHashtagExpected, s) } +func (suite *MarkdownTestSuite) TestParseWithHTML() { + s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil) + suite.Equal(mdWithHTMLExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() { + s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil) + suite.Equal(mdWithCheekyHTMLExpected, s) +} + func TestMarkdownTestSuite(t *testing.T) { suite.Run(t, new(MarkdownTestSuite)) } diff --git a/internal/text/minify.go b/internal/text/minify.go deleted file mode 100644 index e2515b9a4..000000000 --- a/internal/text/minify.go +++ /dev/null @@ -1,39 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -package text - -import ( - "github.com/tdewolff/minify/v2" - "github.com/tdewolff/minify/v2/html" -) - -var m *minify.M - -// MinifyHTML runs html through a minifier, reducing it in size. -func MinifyHTML(in string) (string, error) { - if m == nil { - m = minify.New() - m.Add("text/html", &html.Minifier{ - KeepQuotes: true, - KeepEndTags: true, - KeepDocumentTags: true, - }) - } - return m.String("text/html", in) -} diff --git a/internal/text/plain.go b/internal/text/plain.go index bc10d1b67..3daea5686 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -20,6 +20,7 @@ package text import ( "context" + "html" "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -32,10 +33,11 @@ var breakReplacer = strings.NewReplacer( ) func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { - content := preformat(plain) + // trim any crap + content := strings.TrimSpace(plain) - // sanitize any html elements - content = removeHTML(content) + // clean 'er up + content = html.EscapeString(content) // format links nicely content = f.ReplaceLinks(ctx, content) @@ -52,5 +54,5 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts // wrap the whole thing in a pee content = `

` + content + `

` - return postformat(content) + return SanitizeHTML(content) } diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 2b7b50d5e..cd82e0d1b 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -20,27 +20,21 @@ package text_test import ( "context" - "fmt" "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) const ( - simple = "this is a plain and simple status" - simpleExpected = "

this is a plain and simple status

" - - withTag = "here's a simple status that uses hashtag #welcome!" - withTagExpected = "

here's a simple status that uses hashtag #welcome!

" - - moreComplex = `Another test @foss_satan@fossbros-anonymous.io - -#Hashtag - -Text` - moreComplexFull = "

Another test @foss_satan

#Hashtag

Text

" + simple = "this is a plain and simple status" + simpleExpected = "

this is a plain and simple status

" + withTag = "here's a simple status that uses hashtag #welcome!" + withTagExpected = "

here's a simple status that uses hashtag #welcome!

" + withHTML = "
blah this should just be html escaped blah
" + withHTMLExpected = "

<div>blah this should just be html escaped blah</div>

" + moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" + moreComplexFull = "

Another test @foss_satan

#Hashtag

Text

" ) type PlainTestSuite struct { @@ -49,7 +43,7 @@ type PlainTestSuite struct { func (suite *PlainTestSuite) TestParseSimple() { f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) - assert.Equal(suite.T(), simpleExpected, f) + suite.Equal(simpleExpected, f) } func (suite *PlainTestSuite) TestParseWithTag() { @@ -58,7 +52,12 @@ func (suite *PlainTestSuite) TestParseWithTag() { } f := suite.formatter.FromPlain(context.Background(), withTag, nil, foundTags) - assert.Equal(suite.T(), withTagExpected, f) + suite.Equal(withTagExpected, f) +} + +func (suite *PlainTestSuite) TestParseWithHTML() { + f := suite.formatter.FromPlain(context.Background(), withHTML, nil, nil) + suite.Equal(withHTMLExpected, f) } func (suite *PlainTestSuite) TestParseMoreComplex() { @@ -71,10 +70,7 @@ func (suite *PlainTestSuite) TestParseMoreComplex() { } f := suite.formatter.FromPlain(context.Background(), moreComplex, foundMentions, foundTags) - - fmt.Println(f) - - assert.Equal(suite.T(), moreComplexFull, f) + suite.Equal(moreComplexFull, f) } func TestPlainTestSuite(t *testing.T) { diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index d4faabbb1..96b7ef994 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -19,7 +19,9 @@ package text import ( + "html" "regexp" + "strings" "github.com/microcosm-cc/bluemonday" ) @@ -59,7 +61,8 @@ func SanitizeHTML(in string) string { // SanitizePlaintext runs text through basic sanitization. This removes // any html elements that were in the string, and returns clean plaintext. func SanitizePlaintext(in string) string { - content := preformat(in) + content := html.UnescapeString(in) content = removeHTML(content) - return postformat(content) + content = html.UnescapeString(content) + return strings.TrimSpace(content) } -- cgit v1.2.3