summaryrefslogtreecommitdiff
path: root/internal/text
diff options
context:
space:
mode:
Diffstat (limited to 'internal/text')
-rw-r--r--internal/text/caption.go29
-rw-r--r--internal/text/caption_test.go82
-rw-r--r--internal/text/plain.go2
-rw-r--r--internal/text/removehtml_test.go57
-rw-r--r--internal/text/sanitize.go16
-rw-r--r--internal/text/sanitize_test.go68
6 files changed, 116 insertions, 138 deletions
diff --git a/internal/text/caption.go b/internal/text/caption.go
deleted file mode 100644
index c3c86b0b1..000000000
--- a/internal/text/caption.go
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-package text
-
-// SanitizeCaption runs image captions (or indeed any plain text) through basic sanitization.
-// It returns plain text rather than HTML, in contrast to other functions in this package.
-func SanitizeCaption(in string) string {
- content := preformat(in)
-
- content = RemoveHTML(content)
-
- return postformat(content)
-}
diff --git a/internal/text/caption_test.go b/internal/text/caption_test.go
deleted file mode 100644
index f1337df09..000000000
--- a/internal/text/caption_test.go
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-package text_test
-
-import (
- "testing"
-
- "github.com/stretchr/testify/suite"
- "github.com/superseriousbusiness/gotosocial/internal/text"
-)
-
-type CaptionTestSuite struct {
- suite.Suite
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption1() {
- dodgyCaption := "<script>console.log('haha!')</script>this is just a normal caption ;)"
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("this is just a normal caption ;)", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption2() {
- dodgyCaption := "<em>here's a LOUD caption</em>"
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("here's a LOUD caption", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption3() {
- dodgyCaption := ""
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption4() {
- dodgyCaption := `
-
-
-here is
-a multi line
-caption
-with some newlines
-
-
-
-`
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption5() {
- // html-escaped: "<script>console.log('aha!')</script> hello world"
- dodgyCaption := `&lt;script&gt;console.log(&apos;aha!&apos;)&lt;/script&gt; hello world`
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("hello world", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption6() {
- // html-encoded: "<script>console.log('aha!')</script> hello world"
- dodgyCaption := `&lt;&#115;&#99;&#114;&#105;&#112;&#116;&gt;&#99;&#111;&#110;&#115;&#111;&#108;&#101;&period;&#108;&#111;&#103;&lpar;&apos;&#97;&#104;&#97;&excl;&apos;&rpar;&lt;&sol;&#115;&#99;&#114;&#105;&#112;&#116;&gt;&#32;&#104;&#101;&#108;&#108;&#111;&#32;&#119;&#111;&#114;&#108;&#100;`
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("hello world", sanitized)
-}
-
-func TestCaptionTestSuite(t *testing.T) {
- suite.Run(t, new(CaptionTestSuite))
-}
diff --git a/internal/text/plain.go b/internal/text/plain.go
index 4ef3b3715..bc10d1b67 100644
--- a/internal/text/plain.go
+++ b/internal/text/plain.go
@@ -35,7 +35,7 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts
content := preformat(plain)
// sanitize any html elements
- content = RemoveHTML(content)
+ content = removeHTML(content)
// format links nicely
content = f.ReplaceLinks(ctx, content)
diff --git a/internal/text/removehtml_test.go b/internal/text/removehtml_test.go
new file mode 100644
index 000000000..0029b45a5
--- /dev/null
+++ b/internal/text/removehtml_test.go
@@ -0,0 +1,57 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package text
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/suite"
+)
+
+const (
+ test_removeHTML = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>`
+ test_removedHTML = `Another test @foss_satan#HashtagText`
+ test_withEscapedLiteral = `it\u0026amp;#39;s its it is`
+ test_withEscapedLiteralExpected = `it\u0026amp;#39;s its it is`
+ test_withEscaped = "it\u0026amp;#39;s its it is"
+ test_withEscapedExpected = "it&amp;#39;s its it is"
+)
+
+type RemoveHTMLTestSuite struct {
+ suite.Suite
+}
+
+func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscapedLiteral() {
+ s := removeHTML(test_withEscapedLiteral)
+ suite.Equal(test_withEscapedLiteralExpected, s)
+}
+
+func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscaped() {
+ s := removeHTML(test_withEscaped)
+ suite.Equal(test_withEscapedExpected, s)
+}
+
+func (suite *RemoveHTMLTestSuite) TestRemoveHTML() {
+ s := removeHTML(test_removeHTML)
+ suite.Equal(test_removedHTML, s)
+}
+
+func TestRemoveHTMLTestSuite(t *testing.T) {
+ suite.Run(t, &RemoveHTMLTestSuite{})
+}
diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go
index 897dea34d..d4faabbb1 100644
--- a/internal/text/sanitize.go
+++ b/internal/text/sanitize.go
@@ -46,12 +46,20 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy().
// Source: https://github.com/microcosm-cc/bluemonday#usage
var strict *bluemonday.Policy = bluemonday.StrictPolicy()
-// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements.
+// removeHTML strictly removes *all* recognized HTML elements from the given string.
+func removeHTML(in string) string {
+ return strict.Sanitize(in)
+}
+
+// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through.
func SanitizeHTML(in string) string {
return regular.Sanitize(in)
}
-// RemoveHTML removes all HTML from the given string.
-func RemoveHTML(in string) string {
- return strict.Sanitize(in)
+// SanitizePlaintext runs text through basic sanitization. This removes
+// any html elements that were in the string, and returns clean plaintext.
+func SanitizePlaintext(in string) string {
+ content := preformat(in)
+ content = removeHTML(content)
+ return postformat(content)
}
diff --git a/internal/text/sanitize_test.go b/internal/text/sanitize_test.go
index 4270e2602..eea5daadb 100644
--- a/internal/text/sanitize_test.go
+++ b/internal/text/sanitize_test.go
@@ -26,17 +26,8 @@ import (
)
const (
- removeHTML = `<p>Another test <span class="h-card"><a href="http://fossbros-anonymous.io/@foss_satan" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>foss_satan</span></a></span><br/><br/><a href="http://localhost:8080/tags/Hashtag" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>Hashtag</span></a><br/><br/>Text</p>`
- removedHTML = `Another test @foss_satan#HashtagText`
-
- sanitizeHTML = `here's some naughty html: <script>alert(ahhhh)</script> !!!`
- sanitizedHTML = `here&#39;s some naughty html: !!!`
-
- withEscapedLiteral = `it\u0026amp;#39;s its it is`
- withEscapedLiteralExpected = `it\u0026amp;#39;s its it is`
- withEscaped = "it\u0026amp;#39;s its it is"
- withEscapedExpected = "it&amp;#39;s its it is"
-
+ sanitizeHTML = `here's some naughty html: <script>alert(ahhhh)</script> !!!`
+ sanitizedHTML = `here&#39;s some naughty html: !!!`
sanitizeOutgoing = `<p>gotta test some fucking &#39;&#39;&#39;&#39;&#39;&#39;&#39;&#39;&#39; marks</p>`
sanitizedOutgoing = `<p>gotta test some fucking &#39;&#39;&#39;&#39;&#39;&#39;&#39;&#39;&#39; marks</p>`
)
@@ -45,11 +36,6 @@ type SanitizeTestSuite struct {
suite.Suite
}
-func (suite *SanitizeTestSuite) TestRemoveHTML() {
- s := text.RemoveHTML(removeHTML)
- suite.Equal(removedHTML, s)
-}
-
func (suite *SanitizeTestSuite) TestSanitizeOutgoing() {
s := text.SanitizeHTML(sanitizeOutgoing)
suite.Equal(sanitizedOutgoing, s)
@@ -60,14 +46,52 @@ func (suite *SanitizeTestSuite) TestSanitizeHTML() {
suite.Equal(sanitizedHTML, s)
}
-func (suite *SanitizeTestSuite) TestSanitizeWithEscapedLiteral() {
- s := text.RemoveHTML(withEscapedLiteral)
- suite.Equal(withEscapedLiteralExpected, s)
+func (suite *SanitizeTestSuite) TestSanitizeCaption1() {
+ dodgyCaption := "<script>console.log('haha!')</script>this is just a normal caption ;)"
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("this is just a normal caption ;)", sanitized)
+}
+
+func (suite *SanitizeTestSuite) TestSanitizeCaption2() {
+ dodgyCaption := "<em>here's a LOUD caption</em>"
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("here's a LOUD caption", sanitized)
+}
+
+func (suite *SanitizeTestSuite) TestSanitizeCaption3() {
+ dodgyCaption := ""
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("", sanitized)
+}
+
+func (suite *SanitizeTestSuite) TestSanitizeCaption4() {
+ dodgyCaption := `
+
+
+here is
+a multi line
+caption
+with some newlines
+
+
+
+`
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized)
+}
+
+func (suite *SanitizeTestSuite) TestSanitizeCaption5() {
+ // html-escaped: "<script>console.log('aha!')</script> hello world"
+ dodgyCaption := `&lt;script&gt;console.log(&apos;aha!&apos;)&lt;/script&gt; hello world`
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("hello world", sanitized)
}
-func (suite *SanitizeTestSuite) TestSanitizeWithEscaped() {
- s := text.RemoveHTML(withEscaped)
- suite.Equal(withEscapedExpected, s)
+func (suite *SanitizeTestSuite) TestSanitizeCaption6() {
+ // html-encoded: "<script>console.log('aha!')</script> hello world"
+ dodgyCaption := `&lt;&#115;&#99;&#114;&#105;&#112;&#116;&gt;&#99;&#111;&#110;&#115;&#111;&#108;&#101;&period;&#108;&#111;&#103;&lpar;&apos;&#97;&#104;&#97;&excl;&apos;&rpar;&lt;&sol;&#115;&#99;&#114;&#105;&#112;&#116;&gt;&#32;&#104;&#101;&#108;&#108;&#111;&#32;&#119;&#111;&#114;&#108;&#100;`
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("hello world", sanitized)
}
func TestSanitizeTestSuite(t *testing.T) {