diff options
Diffstat (limited to 'internal/text')
-rw-r--r-- | internal/text/caption.go | 29 | ||||
-rw-r--r-- | internal/text/caption_test.go | 82 | ||||
-rw-r--r-- | internal/text/plain.go | 2 | ||||
-rw-r--r-- | internal/text/removehtml_test.go | 57 | ||||
-rw-r--r-- | internal/text/sanitize.go | 16 | ||||
-rw-r--r-- | internal/text/sanitize_test.go | 68 |
6 files changed, 116 insertions, 138 deletions
diff --git a/internal/text/caption.go b/internal/text/caption.go deleted file mode 100644 index c3c86b0b1..000000000 --- a/internal/text/caption.go +++ /dev/null @@ -1,29 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -package text - -// SanitizeCaption runs image captions (or indeed any plain text) through basic sanitization. -// It returns plain text rather than HTML, in contrast to other functions in this package. -func SanitizeCaption(in string) string { - content := preformat(in) - - content = RemoveHTML(content) - - return postformat(content) -} diff --git a/internal/text/caption_test.go b/internal/text/caption_test.go deleted file mode 100644 index f1337df09..000000000 --- a/internal/text/caption_test.go +++ /dev/null @@ -1,82 +0,0 @@ -/* - GoToSocial - Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -package text_test - -import ( - "testing" - - "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/internal/text" -) - -type CaptionTestSuite struct { - suite.Suite -} - -func (suite *CaptionTestSuite) TestSanitizeCaption1() { - dodgyCaption := "<script>console.log('haha!')</script>this is just a normal caption ;)" - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("this is just a normal caption ;)", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption2() { - dodgyCaption := "<em>here's a LOUD caption</em>" - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("here's a LOUD caption", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption3() { - dodgyCaption := "" - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption4() { - dodgyCaption := ` - - -here is -a multi line -caption -with some newlines - - - -` - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption5() { - // html-escaped: "<script>console.log('aha!')</script> hello world" - dodgyCaption := `<script>console.log('aha!')</script> hello world` - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("hello world", sanitized) -} - -func (suite *CaptionTestSuite) TestSanitizeCaption6() { - // html-encoded: "<script>console.log('aha!')</script> hello world" - dodgyCaption := `<script>console.log('aha!')</script> hello world` - sanitized := text.SanitizeCaption(dodgyCaption) - suite.Equal("hello world", sanitized) -} - -func TestCaptionTestSuite(t *testing.T) { - suite.Run(t, new(CaptionTestSuite)) -} diff --git a/internal/text/plain.go b/internal/text/plain.go index 4ef3b3715..bc10d1b67 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -35,7 +35,7 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts content := preformat(plain) // sanitize any html elements - content = RemoveHTML(content) + content = removeHTML(content) // format links nicely content = f.ReplaceLinks(ctx, content) diff --git a/internal/text/removehtml_test.go b/internal/text/removehtml_test.go new file mode 100644 index 000000000..0029b45a5 --- /dev/null +++ b/internal/text/removehtml_test.go @@ -0,0 +1,57 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "testing" + + "github.com/stretchr/testify/suite" +) + +const ( + test_removeHTML = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>` + test_removedHTML = `Another test @foss_satan#HashtagText` + test_withEscapedLiteral = `it\u0026amp;#39;s its it is` + test_withEscapedLiteralExpected = `it\u0026amp;#39;s its it is` + test_withEscaped = "it\u0026amp;#39;s its it is" + test_withEscapedExpected = "it&#39;s its it is" +) + +type RemoveHTMLTestSuite struct { + suite.Suite +} + +func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscapedLiteral() { + s := removeHTML(test_withEscapedLiteral) + suite.Equal(test_withEscapedLiteralExpected, s) +} + +func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscaped() { + s := removeHTML(test_withEscaped) + suite.Equal(test_withEscapedExpected, s) +} + +func (suite *RemoveHTMLTestSuite) TestRemoveHTML() { + s := removeHTML(test_removeHTML) + suite.Equal(test_removedHTML, s) +} + +func TestRemoveHTMLTestSuite(t *testing.T) { + suite.Run(t, &RemoveHTMLTestSuite{}) +} diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index 897dea34d..d4faabbb1 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -46,12 +46,20 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy(). // Source: https://github.com/microcosm-cc/bluemonday#usage var strict *bluemonday.Policy = bluemonday.StrictPolicy() -// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements. +// removeHTML strictly removes *all* recognized HTML elements from the given string. +func removeHTML(in string) string { + return strict.Sanitize(in) +} + +// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through. func SanitizeHTML(in string) string { return regular.Sanitize(in) } -// RemoveHTML removes all HTML from the given string. -func RemoveHTML(in string) string { - return strict.Sanitize(in) +// SanitizePlaintext runs text through basic sanitization. This removes +// any html elements that were in the string, and returns clean plaintext. +func SanitizePlaintext(in string) string { + content := preformat(in) + content = removeHTML(content) + return postformat(content) } diff --git a/internal/text/sanitize_test.go b/internal/text/sanitize_test.go index 4270e2602..eea5daadb 100644 --- a/internal/text/sanitize_test.go +++ b/internal/text/sanitize_test.go @@ -26,17 +26,8 @@ import ( ) const ( - removeHTML = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>` - removedHTML = `Another test @foss_satan#HashtagText` - - sanitizeHTML = `here's some naughty html: <script>alert(ahhhh)</script> !!!` - sanitizedHTML = `here's some naughty html: !!!` - - withEscapedLiteral = `it\u0026amp;#39;s its it is` - withEscapedLiteralExpected = `it\u0026amp;#39;s its it is` - withEscaped = "it\u0026amp;#39;s its it is" - withEscapedExpected = "it&#39;s its it is" - + sanitizeHTML = `here's some naughty html: <script>alert(ahhhh)</script> !!!` + sanitizedHTML = `here's some naughty html: !!!` sanitizeOutgoing = `<p>gotta test some fucking ''''''''' marks</p>` sanitizedOutgoing = `<p>gotta test some fucking ''''''''' marks</p>` ) @@ -45,11 +36,6 @@ type SanitizeTestSuite struct { suite.Suite } -func (suite *SanitizeTestSuite) TestRemoveHTML() { - s := text.RemoveHTML(removeHTML) - suite.Equal(removedHTML, s) -} - func (suite *SanitizeTestSuite) TestSanitizeOutgoing() { s := text.SanitizeHTML(sanitizeOutgoing) suite.Equal(sanitizedOutgoing, s) @@ -60,14 +46,52 @@ func (suite *SanitizeTestSuite) TestSanitizeHTML() { suite.Equal(sanitizedHTML, s) } -func (suite *SanitizeTestSuite) TestSanitizeWithEscapedLiteral() { - s := text.RemoveHTML(withEscapedLiteral) - suite.Equal(withEscapedLiteralExpected, s) +func (suite *SanitizeTestSuite) TestSanitizeCaption1() { + dodgyCaption := "<script>console.log('haha!')</script>this is just a normal caption ;)" + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("this is just a normal caption ;)", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption2() { + dodgyCaption := "<em>here's a LOUD caption</em>" + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("here's a LOUD caption", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption3() { + dodgyCaption := "" + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption4() { + dodgyCaption := ` + + +here is +a multi line +caption +with some newlines + + + +` + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized) +} + +func (suite *SanitizeTestSuite) TestSanitizeCaption5() { + // html-escaped: "<script>console.log('aha!')</script> hello world" + dodgyCaption := `<script>console.log('aha!')</script> hello world` + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("hello world", sanitized) } -func (suite *SanitizeTestSuite) TestSanitizeWithEscaped() { - s := text.RemoveHTML(withEscaped) - suite.Equal(withEscapedExpected, s) +func (suite *SanitizeTestSuite) TestSanitizeCaption6() { + // html-encoded: "<script>console.log('aha!')</script> hello world" + dodgyCaption := `<script>console.log('aha!')</script> hello world` + sanitized := text.SanitizePlaintext(dodgyCaption) + suite.Equal("hello world", sanitized) } func TestSanitizeTestSuite(t *testing.T) { |