From 5668ce1ec701ed12eb099020e8a322de08e6f810 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Thu, 26 May 2022 11:37:13 +0200 Subject: [bugfix] Fix HTML escaping in instance title (#607) * move caption sanitization -> sanitize.go * use sanitizeplaintext rather than removehtml * rename sanitizecaption to sanitizeplaintext * avoid removing html twice from statuses * unexport remoteHTML it's no longer used outside the text package so this makes it less confusing * test instance PATCH --- internal/text/caption.go | 29 -------------- internal/text/caption_test.go | 82 ---------------------------------------- internal/text/plain.go | 2 +- internal/text/removehtml_test.go | 57 ++++++++++++++++++++++++++++ internal/text/sanitize.go | 16 ++++++-- internal/text/sanitize_test.go | 68 ++++++++++++++++++++++----------- 6 files changed, 116 insertions(+), 138 deletions(-) delete mode 100644 internal/text/caption.go delete mode 100644 internal/text/caption_test.go create mode 100644 internal/text/removehtml_test.go (limited to 'internal/text') diff --git a/internal/text/caption.go b/internal/text/caption.go deleted file mode 100644 index c3c86b0b1..000000000 --- a/internal/text/caption.go +++ /dev/null @@ -1,29 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -package text - -// SanitizeCaption runs image captions (or indeed any plain text) through basic sanitization. -// It returns plain text rather than HTML, in contrast to other functions in this package. -func SanitizeCaption(in string) string { - content := preformat(in) - - content = RemoveHTML(content) - - return postformat(content) -} diff --git a/internal/text/caption_test.go b/internal/text/caption_test.go deleted file mode 100644 index f1337df09..000000000 --- a/internal/text/caption_test.go +++ /dev/null @@ -1,82 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -*/ - -package text_test - -import ( - "testing" - - "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/text" -) - -type CaptionTestSuite struct { - suite.Suite -} - -func (suite *CaptionTestSuite) TestSanitizeCaption1() { - dodgyCaption := "this is just a normal caption ;)" - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("this is just a normal caption ;)", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption2() { - dodgyCaption := "here's a LOUD caption" - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("here's a LOUD caption", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption3() { - dodgyCaption := "" - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption4() { - dodgyCaption := ` - - -here is -a multi line -caption -with some newlines - - - -` - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption5() { - // html-escaped: " hello world" - dodgyCaption := `<script>console.log('aha!')</script> hello world` - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("hello world", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption6() { - // html-encoded: " hello world" - dodgyCaption := `<script>console.log('aha!')</script> hello world` - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("hello world", sanitized) -} - -func TestCaptionTestSuite(t *testing.T) { - suite.Run(t, new(CaptionTestSuite)) -} diff --git a/internal/text/plain.go b/internal/text/plain.go index 4ef3b3715..bc10d1b67 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -35,7 +35,7 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts content := preformat(plain) // sanitize any html elements - content = RemoveHTML(content) + content = removeHTML(content) // format links nicely content = f.ReplaceLinks(ctx, content) diff --git a/internal/text/removehtml_test.go b/internal/text/removehtml_test.go new file mode 100644 index 000000000..0029b45a5 --- /dev/null +++ b/internal/text/removehtml_test.go @@ -0,0 +1,57 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package text + +import ( + "testing" + + "github.com/stretchr/testify/suite" +) + +const ( + test_removeHTML = `

Another test @foss_satan

#Hashtag

Text

` + test_removedHTML = `Another test @foss_satan#HashtagText` + test_withEscapedLiteral = `it\u0026amp;#39;s its it is` + test_withEscapedLiteralExpected = `it\u0026amp;#39;s its it is` + test_withEscaped = "it\u0026amp;#39;s its it is" + test_withEscapedExpected = "it&#39;s its it is" +) + +type RemoveHTMLTestSuite struct { + suite.Suite +} + +func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscapedLiteral() { + s := removeHTML(test_withEscapedLiteral) + suite.Equal(test_withEscapedLiteralExpected, s) +} + +func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscaped() { + s := removeHTML(test_withEscaped) + suite.Equal(test_withEscapedExpected, s) +} + +func (suite *RemoveHTMLTestSuite) TestRemoveHTML() { + s := removeHTML(test_removeHTML) + suite.Equal(test_removedHTML, s) +} + +func TestRemoveHTMLTestSuite(t *testing.T) { + suite.Run(t, &RemoveHTMLTestSuite{}) +} diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index 897dea34d..d4faabbb1 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -46,12 +46,20 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy(). // Source: https://github.com/microcosm-cc/bluemonday#usage var strict *bluemonday.Policy = bluemonday.StrictPolicy() -// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements. +// removeHTML strictly removes *all* recognized HTML elements from the given string. +func removeHTML(in string) string { + return strict.Sanitize(in) +} + +// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through. func SanitizeHTML(in string) string { return regular.Sanitize(in) } -// RemoveHTML removes all HTML from the given string. -func RemoveHTML(in string) string { - return strict.Sanitize(in) +// SanitizePlaintext runs text through basic sanitization. This removes +// any html elements that were in the string, and returns clean plaintext. +func SanitizePlaintext(in string) string { + content := preformat(in) + content = removeHTML(content) + return postformat(content) } diff --git a/internal/text/sanitize_test.go b/internal/text/sanitize_test.go index 4270e2602..eea5daadb 100644 --- a/internal/text/sanitize_test.go +++ b/internal/text/sanitize_test.go @@ -26,17 +26,8 @@ import ( ) const ( - removeHTML = `

Another test @foss_satan

#Hashtag

Text

` - removedHTML = `Another test @foss_satan#HashtagText` - - sanitizeHTML = `here's some naughty html: !!!` - sanitizedHTML = `here's some naughty html: !!!` - - withEscapedLiteral = `it\u0026amp;#39;s its it is` - withEscapedLiteralExpected = `it\u0026amp;#39;s its it is` - withEscaped = "it\u0026amp;#39;s its it is" - withEscapedExpected = "it&#39;s its it is" - + sanitizeHTML = `here's some naughty html: !!!` + sanitizedHTML = `here's some naughty html: !!!` sanitizeOutgoing = `

gotta test some fucking ''''''''' marks

` sanitizedOutgoing = `

gotta test some fucking ''''''''' marks

` ) @@ -45,11 +36,6 @@ type SanitizeTestSuite struct { suite.Suite } -func (suite *SanitizeTestSuite) TestRemoveHTML() { - s := text.RemoveHTML(removeHTML) - suite.Equal(removedHTML, s) -} - func (suite *SanitizeTestSuite) TestSanitizeOutgoing() { s := text.SanitizeHTML(sanitizeOutgoing) suite.Equal(sanitizedOutgoing, s) @@ -60,14 +46,52 @@ func (suite *SanitizeTestSuite) TestSanitizeHTML() { suite.Equal(sanitizedHTML, s) } -func (suite *SanitizeTestSuite) TestSanitizeWithEscapedLiteral() { - s := text.RemoveHTML(withEscapedLiteral) - suite.Equal(withEscapedLiteralExpected, s) +func (suite *SanitizeTestSuite) TestSanitizeCaption1() { + dodgyCaption := "this is just a normal caption ;)" + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("this is just a normal caption ;)", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption2() { + dodgyCaption := "here's a LOUD caption" + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("here's a LOUD caption", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption3() { + dodgyCaption := "" + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption4() { + dodgyCaption := ` + + +here is +a multi line +caption +with some newlines + + + +` + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption5() { + // html-escaped: " hello world" + dodgyCaption := `<script>console.log('aha!')</script> hello world` + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("hello world", sanitized) } -func (suite *SanitizeTestSuite) TestSanitizeWithEscaped() { - s := text.RemoveHTML(withEscaped) - suite.Equal(withEscapedExpected, s) +func (suite *SanitizeTestSuite) TestSanitizeCaption6() { + // html-encoded: " hello world" + dodgyCaption := `<script>console.log('aha!')</script> hello world` + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("hello world", sanitized) } func TestSanitizeTestSuite(t *testing.T) { -- cgit v1.2.3