From c84384e6608368a13a774d6d33a8cc32da7cf209 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Tue, 19 Jul 2022 15:21:17 +0200 Subject: [bugfix] html escape special characters in text instead of totally removing them (#719) * remove minify dependency * tidy up some tests * remove pre + postformat funcs * rework sanitization + formatting * update tests * add some more markdown tests --- internal/text/common.go | 33 ------------------------------ internal/text/common_test.go | 46 ++++++++---------------------------------- internal/text/link_test.go | 20 +++++++++--------- internal/text/markdown.go | 6 ++---- internal/text/markdown_test.go | 32 ++++++++++++++++++++--------- internal/text/minify.go | 39 ----------------------------------- internal/text/plain.go | 10 +++++---- internal/text/plain_test.go | 36 +++++++++++++++------------------ internal/text/sanitize.go | 7 +++++-- 9 files changed, 70 insertions(+), 159 deletions(-) delete mode 100644 internal/text/minify.go (limited to 'internal/text') diff --git a/internal/text/common.go b/internal/text/common.go index 9ed3fb06f..005f9dfe1 100644 --- a/internal/text/common.go +++ b/internal/text/common.go @@ -21,7 +21,6 @@ package text import ( "bytes" "context" - "html" "strings" "unicode" @@ -30,38 +29,6 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/regexes" ) -// preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text. -func preformat(in string) string { - // do some preformatting of the text - - // 1. unescape everything that might be html escaped - s := html.UnescapeString(in) - - // 2. trim leading or trailing whitespace - s = strings.TrimSpace(s) - return s -} - -// postformat contains some common logic for html sanitization of text, wrapping elements, and trimming newlines and whitespace -func postformat(in string) string { - // do some postformatting of the text - - // 1. sanitize html to remove potentially dangerous elements - s := SanitizeHTML(in) - - // 2. the sanitize step tends to escape characters inside codeblocks, which is behavior we don't want, so unescape everything again - s = html.UnescapeString(s) - - // 3. minify html to remove any trailing newlines, spaces, unnecessary elements, etc etc - mini, err := MinifyHTML(s) - if err != nil { - // if the minify failed, just return what we have - return s - } - // return minified version of the html - return mini -} - func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string { return regexes.ReplaceAllStringFunc(regexes.HashtagFinder, in, func(match string, buf *bytes.Buffer) string { // we have a match diff --git a/internal/text/common_test.go b/internal/text/common_test.go index 5e8f05b30..48f5240d2 100644 --- a/internal/text/common_test.go +++ b/internal/text/common_test.go @@ -28,44 +28,14 @@ import ( ) const ( - replaceMentionsString = `Another test @foss_satan@fossbros-anonymous.io - -#Hashtag - -Text` - replaceMentionsExpected = `Another test @foss_satan - -#Hashtag - -Text` - - replaceHashtagsExpected = `Another test @foss_satan@fossbros-anonymous.io - -#Hashtag - -Text` - - replaceHashtagsAfterMentionsExpected = `Another test @foss_satan - -#Hashtag - -Text` - - replaceMentionsWithLinkString = `Another test @foss_satan@fossbros-anonymous.io - -http://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060` - - replaceMentionsWithLinkStringExpected = `Another test @foss_satan - -http://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060` - - replaceMentionsWithLinkSelfString = `Mentioning myself: @the_mighty_zork - -and linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR` - - replaceMemtionsWithLinkSelfExpected = `Mentioning myself: @the_mighty_zork - -and linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR` + replaceMentionsString = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" + replaceMentionsExpected = "Another test @foss_satan\n\n#Hashtag\n\nText" + replaceHashtagsExpected = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" + replaceHashtagsAfterMentionsExpected = "Another test @foss_satan\n\n#Hashtag\n\nText" + replaceMentionsWithLinkString = "Another test @foss_satan@fossbros-anonymous.io\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" + replaceMentionsWithLinkStringExpected = "Another test @foss_satan\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" + replaceMentionsWithLinkSelfString = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" + replaceMemtionsWithLinkSelfExpected = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" ) type CommonTestSuite struct { diff --git a/internal/text/link_test.go b/internal/text/link_test.go index 24484e02d..e50a8dd69 100644 --- a/internal/text/link_test.go +++ b/internal/text/link_test.go @@ -71,16 +71,16 @@ type LinkTestSuite struct { func (suite *LinkTestSuite) TestParseSimple() { f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) - assert.Equal(suite.T(), simpleExpected, f) + suite.Equal(simpleExpected, f) } func (suite *LinkTestSuite) TestParseURLsFromText1() { urls := text.FindLinks(text1) - assert.Equal(suite.T(), "https://example.org/link/to/something#fragment", urls[0].String()) - assert.Equal(suite.T(), "http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) - assert.Equal(suite.T(), "https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) - assert.Equal(suite.T(), "https://example.orghttps://google.com", urls[3].String()) + suite.Equal("https://example.org/link/to/something#fragment", urls[0].String()) + suite.Equal("http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) + suite.Equal("https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) + suite.Equal("https://example.orghttps://google.com", urls[3].String()) } func (suite *LinkTestSuite) TestParseURLsFromText2() { @@ -99,7 +99,7 @@ func (suite *LinkTestSuite) TestParseURLsFromText3() { func (suite *LinkTestSuite) TestReplaceLinksFromText1() { replaced := suite.formatter.ReplaceLinks(context.Background(), text1) - assert.Equal(suite.T(), ` + suite.Equal(` This is a text with some links in it. Here's link number one: example.org/link/to/something#fragment Here's link number two: test.example.org?q=bahhhhhhhhhhhh @@ -114,7 +114,7 @@ really.cool.website <-- this one shouldn't be parsed as a link because it doesn' func (suite *LinkTestSuite) TestReplaceLinksFromText2() { replaced := suite.formatter.ReplaceLinks(context.Background(), text2) - assert.Equal(suite.T(), ` + suite.Equal(` this is one link: example.org this is the same link again: example.org @@ -126,14 +126,14 @@ these should be deduplicated func (suite *LinkTestSuite) TestReplaceLinksFromText3() { // we know mailto links won't be replaced with hrefs -- we only accept https and http replaced := suite.formatter.ReplaceLinks(context.Background(), text3) - assert.Equal(suite.T(), ` + suite.Equal(` here's a mailto link: mailto:whatever@test.org `, replaced) } func (suite *LinkTestSuite) TestReplaceLinksFromText4() { replaced := suite.formatter.ReplaceLinks(context.Background(), text4) - assert.Equal(suite.T(), ` + suite.Equal(` two similar links: example.org @@ -145,7 +145,7 @@ two similar links: func (suite *LinkTestSuite) TestReplaceLinksFromText5() { // we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function replaced := suite.formatter.ReplaceLinks(context.Background(), text5) - assert.Equal(suite.T(), ` + suite.Equal(` what happens when we already have a link within an href? example.org">example.org diff --git a/internal/text/markdown.go b/internal/text/markdown.go index 01238954f..a5c62f23f 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -26,13 +26,11 @@ import ( ) func (f *formatter) FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { - content := preformat(md) - // do the markdown parsing *first* - contentBytes := blackfriday.Run([]byte(content)) + contentBytes := blackfriday.Run([]byte(md)) // format tags nicely - content = f.ReplaceTags(ctx, string(contentBytes), tags) + content := f.ReplaceTags(ctx, string(contentBytes), tags) // format mentions nicely content = f.ReplaceMentions(ctx, content, mentions) diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go index 111cfe473..74a18a685 100644 --- a/internal/text/markdown_test.go +++ b/internal/text/markdown_test.go @@ -44,15 +44,19 @@ that was some JSON :) ` const ( - simpleMarkdown = "# Title\n\nHere's a simple text in markdown.\n\nHere's a [link](https://example.org)." - simpleMarkdownExpected = "
Here’s a simple text in markdown.
\n\nHere’s a link.
\n" - withCodeBlockExpected = "Below is some JSON.
\n\n{\n "key": "value",\n "another_key": [\n "value1",\n "value2"\n ]\n}\n
\n\nthat was some JSON :)
\n" - withInlineCode = "`Nobody tells you about theSECRET CODE
, do they?`"
- withInlineCodeExpected = "Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?
, do they?`"
- withInlineCode2Expected = "Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?
\n"
- withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!"
- withHashtagExpected = "Title
\n\nhere’s a simple status that uses hashtag #Hashtag!
\n"
+ simpleMarkdown = "# Title\n\nHere's a simple text in markdown.\n\nHere's a [link](https://example.org)."
+ simpleMarkdownExpected = "Title
\n\nHere’s a simple text in markdown.
\n\nHere’s a link.
\n"
+ withCodeBlockExpected = "Title
\n\nBelow is some JSON.
\n\n{\n "key": "value",\n "another_key": [\n "value1",\n "value2"\n ]\n}\n
\n\nthat was some JSON :)
\n"
+ withInlineCode = "`Nobody tells you about the SECRET CODE
, do they?`"
+ withInlineCodeExpected = "Nobody tells you about the <code><del>SECRET CODE</del></code>, do they?
\n"
+ withInlineCode2 = "`Nobody tells you about the
, do they?`"
+ withInlineCode2Expected = "Nobody tells you about the </code><del>SECRET CODE</del><code>, do they?
\n"
+ withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!"
+ withHashtagExpected = "Title
\n\nhere’s a simple status that uses hashtag #Hashtag!
\n"
+ mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a link.\n\nHere's an image:
"
+ mdWithHTMLExpected = "Title
\n\nHere’s a simple text in markdown.
\n\nHere’s a link.
\n\nHere’s an image: 
\n"
+ mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: "
+ mdWithCheekyHTMLExpected = "Title
\n\nHere’s a simple text in markdown.
\n\nHere’s a cheeky little script:
\n"
)
type MarkdownTestSuite struct {
@@ -88,6 +92,16 @@ func (suite *MarkdownTestSuite) TestParseWithHashtag() {
suite.Equal(withHashtagExpected, s)
}
+func (suite *MarkdownTestSuite) TestParseWithHTML() {
+ s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil)
+ suite.Equal(mdWithHTMLExpected, s)
+}
+
+func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() {
+ s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil)
+ suite.Equal(mdWithCheekyHTMLExpected, s)
+}
+
func TestMarkdownTestSuite(t *testing.T) {
suite.Run(t, new(MarkdownTestSuite))
}
diff --git a/internal/text/minify.go b/internal/text/minify.go
deleted file mode 100644
index e2515b9a4..000000000
--- a/internal/text/minify.go
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-*/
-
-package text
-
-import (
- "github.com/tdewolff/minify/v2"
- "github.com/tdewolff/minify/v2/html"
-)
-
-var m *minify.M
-
-// MinifyHTML runs html through a minifier, reducing it in size.
-func MinifyHTML(in string) (string, error) {
- if m == nil {
- m = minify.New()
- m.Add("text/html", &html.Minifier{
- KeepQuotes: true,
- KeepEndTags: true,
- KeepDocumentTags: true,
- })
- }
- return m.String("text/html", in)
-}
diff --git a/internal/text/plain.go b/internal/text/plain.go
index bc10d1b67..3daea5686 100644
--- a/internal/text/plain.go
+++ b/internal/text/plain.go
@@ -20,6 +20,7 @@ package text
import (
"context"
+ "html"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
@@ -32,10 +33,11 @@ var breakReplacer = strings.NewReplacer(
)
func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string {
- content := preformat(plain)
+ // trim any crap
+ content := strings.TrimSpace(plain)
- // sanitize any html elements
- content = removeHTML(content)
+ // clean 'er up
+ content = html.EscapeString(content)
// format links nicely
content = f.ReplaceLinks(ctx, content)
@@ -52,5 +54,5 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts
// wrap the whole thing in a pee
content = `` + content + `
`
- return postformat(content)
+ return SanitizeHTML(content)
}
diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go
index 2b7b50d5e..cd82e0d1b 100644
--- a/internal/text/plain_test.go
+++ b/internal/text/plain_test.go
@@ -20,27 +20,21 @@ package text_test
import (
"context"
- "fmt"
"testing"
- "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
const (
- simple = "this is a plain and simple status"
- simpleExpected = "this is a plain and simple status
"
-
- withTag = "here's a simple status that uses hashtag #welcome!"
- withTagExpected = "here's a simple status that uses hashtag #welcome!
"
-
- moreComplex = `Another test @foss_satan@fossbros-anonymous.io
-
-#Hashtag
-
-Text`
- moreComplexFull = "Another test @foss_satan
#Hashtag
Text
"
+ simple = "this is a plain and simple status"
+ simpleExpected = "this is a plain and simple status
"
+ withTag = "here's a simple status that uses hashtag #welcome!"
+ withTagExpected = "here's a simple status that uses hashtag #welcome!
"
+ withHTML = "blah this should just be html escaped blah"
+ withHTMLExpected = "<div>blah this should just be html escaped blah</div>
"
+ moreComplex = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText"
+ moreComplexFull = "Another test @foss_satan
#Hashtag
Text
"
)
type PlainTestSuite struct {
@@ -49,7 +43,7 @@ type PlainTestSuite struct {
func (suite *PlainTestSuite) TestParseSimple() {
f := suite.formatter.FromPlain(context.Background(), simple, nil, nil)
- assert.Equal(suite.T(), simpleExpected, f)
+ suite.Equal(simpleExpected, f)
}
func (suite *PlainTestSuite) TestParseWithTag() {
@@ -58,7 +52,12 @@ func (suite *PlainTestSuite) TestParseWithTag() {
}
f := suite.formatter.FromPlain(context.Background(), withTag, nil, foundTags)
- assert.Equal(suite.T(), withTagExpected, f)
+ suite.Equal(withTagExpected, f)
+}
+
+func (suite *PlainTestSuite) TestParseWithHTML() {
+ f := suite.formatter.FromPlain(context.Background(), withHTML, nil, nil)
+ suite.Equal(withHTMLExpected, f)
}
func (suite *PlainTestSuite) TestParseMoreComplex() {
@@ -71,10 +70,7 @@ func (suite *PlainTestSuite) TestParseMoreComplex() {
}
f := suite.formatter.FromPlain(context.Background(), moreComplex, foundMentions, foundTags)
-
- fmt.Println(f)
-
- assert.Equal(suite.T(), moreComplexFull, f)
+ suite.Equal(moreComplexFull, f)
}
func TestPlainTestSuite(t *testing.T) {
diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go
index d4faabbb1..96b7ef994 100644
--- a/internal/text/sanitize.go
+++ b/internal/text/sanitize.go
@@ -19,7 +19,9 @@
package text
import (
+ "html"
"regexp"
+ "strings"
"github.com/microcosm-cc/bluemonday"
)
@@ -59,7 +61,8 @@ func SanitizeHTML(in string) string {
// SanitizePlaintext runs text through basic sanitization. This removes
// any html elements that were in the string, and returns clean plaintext.
func SanitizePlaintext(in string) string {
- content := preformat(in)
+ content := html.UnescapeString(in)
content = removeHTML(content)
- return postformat(content)
+ content = html.UnescapeString(content)
+ return strings.TrimSpace(content)
}
--
cgit v1.2.3