From ce190d867ca126001a1c0417b00810fc03c0b3ba Mon Sep 17 00:00:00 2001 From: Tobi Smethurst <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 16 Aug 2021 19:17:56 +0200 Subject: Text/status parsing fixes (#141) * aaaaaa * vendor minify * update + test markdown parsing --- internal/api/client/status/statuscreate_test.go | 6 +- internal/processing/status/util_test.go | 6 +- internal/text/common.go | 33 ++++--- internal/text/markdown.go | 11 +-- internal/text/markdown_test.go | 116 ++++++++++++++++++++++++ internal/text/minify.go | 39 ++++++++ internal/text/plain.go | 7 ++ internal/text/plain_test.go | 8 +- internal/text/sanitize.go | 17 ++-- internal/text/sanitize_test.go | 75 +++++++++++++++ 10 files changed, 277 insertions(+), 41 deletions(-) create mode 100644 internal/text/markdown_test.go create mode 100644 internal/text/minify.go create mode 100644 internal/text/sanitize_test.go (limited to 'internal') diff --git a/internal/api/client/status/statuscreate_test.go b/internal/api/client/status/statuscreate_test.go index c175a54ec..33912397e 100644 --- a/internal/api/client/status/statuscreate_test.go +++ b/internal/api/client/status/statuscreate_test.go @@ -165,7 +165,7 @@ func (suite *StatusCreateTestSuite) TestPostAnotherNewStatus() { err = json.Unmarshal(b, statusReply) assert.NoError(suite.T(), err) - assert.Equal(suite.T(), "\u003cp\u003e\u003ca href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\"\u003e#\u003cspan\u003etest\u003c/span\u003e\u003c/a\u003e alright, should be able to post \u003ca href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\"\u003e#\u003cspan\u003elinks\u003c/span\u003e\u003c/a\u003e with fragments in them now, let\u0026#39;s see........\u003cbr/\u003e\u003cbr/\u003e\u003ca href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"noopener nofollow noreferrer\" target=\"_blank\"\u003edocs.gotosocial.org/en/latest/user_guide/posts/#links\u003c/a\u003e\u003cbr/\u003e\u003cbr/\u003e\u003ca href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\"\u003e#\u003cspan\u003egotosocial\u003c/span\u003e\u003c/a\u003e\u003cbr/\u003e\u003cbr/\u003e(tobi remember to pull the docker image challenge)\u003c/p\u003e", statusReply.Content) + assert.Equal(suite.T(), "
#test alright, should be able to post #links with fragments in them now, let's see........
docs.gotosocial.org/en/latest/user_guide/posts/#links
#gotosocial
(tobi remember to pull the docker image challenge)
here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:
here's an emoji that isn't in the db: :test_emoji:
here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:
here's an emoji that isn't in the db: :test_emoji:
here's an image attachment
", statusResponse.Content) + assert.Equal(suite.T(), "here's an image attachment
", statusResponse.Content) assert.False(suite.T(), statusResponse.Sensitive) assert.Equal(suite.T(), model.VisibilityPublic, statusResponse.Visibility) diff --git a/internal/processing/status/util_test.go b/internal/processing/status/util_test.go index 9a4bd6515..4bf508848 100644 --- a/internal/processing/status/util_test.go +++ b/internal/processing/status/util_test.go @@ -17,8 +17,8 @@ const statusText1 = `Another test @foss_satan@fossbros-anonymous.io #Hashtag Text` -const statusText1ExpectedFull = `Another test @foss_satan
#Hashtag
Text
Another test @foss_satan
#Hashtag
Text
Another test @foss_satan
#Hashtag
Text
Another test @foss_satan
#Hashtag
Text
Another test @foss_satan
#Hashtag
#hashTAG
Another test @foss_satan
#Hashtag
#hashTAG
%s
`, s) - // 3. remove any cheeky newlines - s = strings.ReplaceAll(s, "\n", "") - // 4. remove any whitespace added as a result of the formatting - s = strings.TrimSpace(s) - return s + + // 1. sanitize html to remove potentially dangerous elements + s := SanitizeHTML(in) + + // 2. the sanitize step tends to escape characters inside codeblocks, which is behavior we don't want, so unescape everything again + s = html.UnescapeString(s) + + // 3. minify html to remove any trailing newlines, spaces, unnecessary elements, etc etc + mini, err := minifyHTML(s) + if err != nil { + // if the minify failed, just return what we have + return s + } + // return minified version of the html + return mini } func (f *formatter) ReplaceTags(in string, tags []*gtsmodel.Tag) string { diff --git a/internal/text/markdown.go b/internal/text/markdown.go index f9d12209a..5a7603615 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -23,21 +23,14 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) -var bfExtensions = blackfriday.NoIntraEmphasis | - blackfriday.FencedCode | - blackfriday.Autolink | - blackfriday.Strikethrough | - blackfriday.SpaceHeadings | - blackfriday.BackslashLineBreak - func (f *formatter) FromMarkdown(md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { content := preformat(md) // do the markdown parsing *first* - content = string(blackfriday.Run([]byte(content), blackfriday.WithExtensions(bfExtensions))) + contentBytes := blackfriday.Run([]byte(content)) // format tags nicely - content = f.ReplaceTags(content, tags) + content = f.ReplaceTags(string(contentBytes), tags) // format mentions nicely content = f.ReplaceMentions(content, mentions) diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go new file mode 100644 index 000000000..432e9a4ec --- /dev/null +++ b/internal/text/markdown_test.go @@ -0,0 +1,116 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, seeHere’s a simple text in markdown.
Here’s a link.
" + + withCodeBlockExpected = "Below is some JSON.
{\n \"key\": \"value\",\n \"another_key\": [\n \"value1\",\n \"value2\"\n ]\n}\nthat was some JSON :)
" + + withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!" + withHashtagExpected = "here’s a simple status that uses hashtag #Hashtag!
" +) + +var ( + withCodeBlock = `# Title + +Below is some JSON. + +` + "```" + `json +{ + "key": "value", + "another_key": [ + "value1", + "value2" + ] +} +` + "```" + ` + +that was some JSON :) +` +) + +type MarkdownTestSuite struct { + TextStandardTestSuite +} + +func (suite *MarkdownTestSuite) SetupSuite() { + suite.testTokens = testrig.NewTestTokens() + suite.testClients = testrig.NewTestClients() + suite.testApplications = testrig.NewTestApplications() + suite.testUsers = testrig.NewTestUsers() + suite.testAccounts = testrig.NewTestAccounts() + suite.testAttachments = testrig.NewTestAttachments() + suite.testStatuses = testrig.NewTestStatuses() + suite.testTags = testrig.NewTestTags() + suite.testMentions = testrig.NewTestMentions() +} + +func (suite *MarkdownTestSuite) SetupTest() { + suite.config = testrig.NewTestConfig() + suite.db = testrig.NewTestDB() + suite.log = testrig.NewTestLog() + suite.formatter = text.NewFormatter(suite.config, suite.db, suite.log) + + testrig.StandardDBSetup(suite.db, suite.testAccounts) +} + +func (suite *MarkdownTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.db) +} + +func (suite *MarkdownTestSuite) TestParseSimple() { + s := suite.formatter.FromMarkdown(simpleMarkdown, nil, nil) + suite.Equal(simpleMarkdownExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseWithCodeBlock() { + fmt.Println(withCodeBlock) + s := suite.formatter.FromMarkdown(withCodeBlock, nil, nil) + suite.Equal(withCodeBlockExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseWithHashtag() { + foundTags := []*gtsmodel.Tag{ + suite.testTags["Hashtag"], + } + + s := suite.formatter.FromMarkdown(withHashtag, nil, foundTags) + suite.Equal(withHashtagExpected, s) +} + +func TestMarkdownTestSuite(t *testing.T) { + suite.Run(t, new(MarkdownTestSuite)) +} diff --git a/internal/text/minify.go b/internal/text/minify.go new file mode 100644 index 000000000..c6d7b9bc1 --- /dev/null +++ b/internal/text/minify.go @@ -0,0 +1,39 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see%s
`, content) + return postformat(content) } diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 2f9eb3a29..33c95234c 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -33,15 +33,15 @@ const ( simple = "this is a plain and simple status" simpleExpected = "this is a plain and simple status
" - withTag = "this is a simple status that uses hashtag #welcome!" - withTagExpected = "this is a simple status that uses hashtag #welcome!
" + withTag = "here's a simple status that uses hashtag #welcome!" + withTagExpected = "here's a simple status that uses hashtag #welcome!
" moreComplex = `Another test @foss_satan@fossbros-anonymous.io #Hashtag Text` - moreComplexExpected = `Another test @foss_satan
#Hashtag
Text
Another test @foss_satan
#Hashtag
Text
Another test @foss_satan
#Hashtag
Text
gotta test some fucking ''''''''' marks
` + sanitizedOutgoing = `gotta test some fucking ''''''''' marks
` +) + +type SanitizeTestSuite struct { + suite.Suite +} + +func (suite *SanitizeTestSuite) TestRemoveHTML() { + s := text.RemoveHTML(removeHTML) + suite.Equal(removedHTML, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeOutgoing() { + s := text.SanitizeHTML(sanitizeOutgoing) + suite.Equal(sanitizedOutgoing, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeHTML() { + s := text.SanitizeHTML(sanitizeHTML) + suite.Equal(sanitizedHTML, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeWithEscapedLiteral() { + s := text.RemoveHTML(withEscapedLiteral) + suite.Equal(withEscapedLiteralExpected, s) +} + +func (suite *SanitizeTestSuite) TestSanitizeWithEscaped() { + s := text.RemoveHTML(withEscaped) + suite.Equal(withEscapedExpected, s) +} + +func TestSanitizeTestSuite(t *testing.T) { + suite.Run(t, new(SanitizeTestSuite)) +} -- cgit v1.3