diff options
| author | 2023-02-03 10:58:58 +0000 | |
|---|---|---|
| committer | 2023-02-03 11:58:58 +0100 | |
| commit | 49beb17a8fbdbf3517c103a477a5459a3bba404d (patch) | |
| tree | 364c82d4089c75d3b95a5d78fd31b33d91b30b59 /internal/text | |
| parent | [bugfix] Read Bookwyrm Articles more thoroughly (#1410) (diff) | |
| download | gotosocial-49beb17a8fbdbf3517c103a477a5459a3bba404d.tar.xz | |
[chore] Text formatting overhaul (#1406)
* Implement goldmark debug print for hashtags and mentions
* Minify HTML in FromPlain
* Convert plaintext status parser to goldmark
* Move mention/tag/emoji finding logic into formatter
* Combine mention and hashtag boundary characters
* Normalize unicode when rendering hashtags
Diffstat (limited to 'internal/text')
| -rw-r--r-- | internal/text/common.go | 112 | ||||
| -rw-r--r-- | internal/text/common_test.go | 106 | ||||
| -rw-r--r-- | internal/text/emojionly.go | 71 | ||||
| -rw-r--r-- | internal/text/formatter.go | 24 | ||||
| -rw-r--r-- | internal/text/formatter_test.go | 22 | ||||
| -rw-r--r-- | internal/text/goldmark_extension.go (renamed from internal/text/markdownextension.go) | 159 | ||||
| -rw-r--r-- | internal/text/goldmark_plaintext.go | 64 | ||||
| -rw-r--r-- | internal/text/link.go | 86 | ||||
| -rw-r--r-- | internal/text/link_test.go | 157 | ||||
| -rw-r--r-- | internal/text/markdown.go | 54 | ||||
| -rw-r--r-- | internal/text/markdown_test.go | 117 | ||||
| -rw-r--r-- | internal/text/minify.go | 45 | ||||
| -rw-r--r-- | internal/text/plain.go | 68 | ||||
| -rw-r--r-- | internal/text/plain_test.go | 125 | ||||
| -rw-r--r-- | internal/text/replace.go | 141 | 
15 files changed, 705 insertions, 646 deletions
| diff --git a/internal/text/common.go b/internal/text/common.go deleted file mode 100644 index 2293ca3fe..000000000 --- a/internal/text/common.go +++ /dev/null @@ -1,112 +0,0 @@ -/* -   GoToSocial -   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - -   This program is free software: you can redistribute it and/or modify -   it under the terms of the GNU Affero General Public License as published by -   the Free Software Foundation, either version 3 of the License, or -   (at your option) any later version. - -   This program is distributed in the hope that it will be useful, -   but WITHOUT ANY WARRANTY; without even the implied warranty of -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU Affero General Public License for more details. - -   You should have received a copy of the GNU Affero General Public License -   along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -package text - -import ( -	"bytes" -	"context" -	"strings" -	"unicode" - -	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" -	"github.com/superseriousbusiness/gotosocial/internal/log" -	"github.com/superseriousbusiness/gotosocial/internal/regexes" -	"github.com/superseriousbusiness/gotosocial/internal/util" -) - -func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string { -	spans := util.FindHashtagSpansInText(in) - -	if len(spans) == 0 { -		return in -	} - -	var b strings.Builder -	i := 0 - -spans: -	for _, t := range spans { -		b.WriteString(in[i:t.First]) -		i = t.Second -		tagAsEntered := in[t.First+1 : t.Second] - -		for _, tag := range tags { -			if strings.EqualFold(tagAsEntered, tag.Name) { -				// replace the #tag with the formatted tag content -				// `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a> -				b.WriteString(`<a href="`) -				b.WriteString(tag.URL) -				b.WriteString(`" class="mention hashtag" rel="tag">#<span>`) -				b.WriteString(tagAsEntered) -				b.WriteString(`</span></a>`) -				continue spans -			} -		} - -		b.WriteString(in[t.First:t.Second]) -	} - -	// Get the last bits. -	i = spans[len(spans)-1].Second -	b.WriteString(in[i:]) - -	return b.String() -} - -func (f *formatter) ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string { -	return regexes.ReplaceAllStringFunc(regexes.MentionFinder, in, func(match string, buf *bytes.Buffer) string { -		// we have a match, trim any spaces -		matchTrimmed := strings.TrimSpace(match) - -		// check through mentions to find what we're matching -		for _, menchie := range mentions { -			if strings.EqualFold(matchTrimmed, menchie.NameString) { -				// make sure we have an account attached to this mention -				if menchie.TargetAccount == nil { -					a, err := f.db.GetAccountByID(ctx, menchie.TargetAccountID) -					if err != nil { -						log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err) -						return match -					} -					menchie.TargetAccount = a -				} - -				// The mention's target is our target -				targetAccount := menchie.TargetAccount - -				// Add any dropped space from match -				if unicode.IsSpace(rune(match[0])) { -					buf.WriteByte(match[0]) -				} - -				// replace the mention with the formatted mention content -				// <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span> -				buf.WriteString(`<span class="h-card"><a href="`) -				buf.WriteString(targetAccount.URL) -				buf.WriteString(`" class="u-url mention">@<span>`) -				buf.WriteString(targetAccount.Username) -				buf.WriteString(`</span></a></span>`) -				return buf.String() -			} -		} - -		// the match wasn't in the list of mentions for whatever reason, so just return the match as we found it so nothing changes -		return match -	}) -} diff --git a/internal/text/common_test.go b/internal/text/common_test.go deleted file mode 100644 index 3949226ca..000000000 --- a/internal/text/common_test.go +++ /dev/null @@ -1,106 +0,0 @@ -/* -   GoToSocial -   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - -   This program is free software: you can redistribute it and/or modify -   it under the terms of the GNU Affero General Public License as published by -   the Free Software Foundation, either version 3 of the License, or -   (at your option) any later version. - -   This program is distributed in the hope that it will be useful, -   but WITHOUT ANY WARRANTY; without even the implied warranty of -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU Affero General Public License for more details. - -   You should have received a copy of the GNU Affero General Public License -   along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -package text_test - -import ( -	"context" -	"testing" -	"time" - -	"github.com/stretchr/testify/suite" -	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" -) - -const ( -	replaceMentionsString                 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" -	replaceMentionsExpected               = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n#Hashtag\n\nText" -	replaceHashtagsExpected               = "Another test @foss_satan@fossbros-anonymous.io\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText" -	replaceHashtagsAfterMentionsExpected  = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText" -	replaceMentionsWithLinkString         = "Another test @foss_satan@fossbros-anonymous.io\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" -	replaceMentionsWithLinkStringExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" -	replaceMentionsWithLinkSelfString     = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" -	replaceMemtionsWithLinkSelfExpected   = "Mentioning myself: <span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\">@<span>the_mighty_zork</span></a></span>\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" -) - -type CommonTestSuite struct { -	TextStandardTestSuite -} - -func (suite *CommonTestSuite) TestReplaceMentions() { -	foundMentions := []*gtsmodel.Mention{ -		suite.testMentions["zork_mention_foss_satan"], -	} - -	f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsString, foundMentions) -	suite.Equal(replaceMentionsExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceHashtags() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	} - -	f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsString, foundTags) - -	suite.Equal(replaceHashtagsExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceHashtagsAfterReplaceMentions() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	} - -	f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsExpected, foundTags) - -	suite.Equal(replaceHashtagsAfterMentionsExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceMentionsWithLink() { -	foundMentions := []*gtsmodel.Mention{ -		suite.testMentions["zork_mention_foss_satan"], -	} - -	f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkString, foundMentions) -	suite.Equal(replaceMentionsWithLinkStringExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceMentionsWithLinkSelf() { -	mentioningAccount := suite.testAccounts["local_account_1"] - -	foundMentions := []*gtsmodel.Mention{ -		{ -			ID:               "01FGXKN5F815DVFVD53PN9NYM6", -			CreatedAt:        time.Now(), -			UpdatedAt:        time.Now(), -			StatusID:         "01FGXKP0S5THQXFC1D9R141DDR", -			OriginAccountID:  mentioningAccount.ID, -			TargetAccountID:  mentioningAccount.ID, -			NameString:       "@the_mighty_zork", -			TargetAccountURI: mentioningAccount.URI, -			TargetAccountURL: mentioningAccount.URL, -		}, -	} - -	f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkSelfString, foundMentions) -	suite.Equal(replaceMemtionsWithLinkSelfExpected, f) -} - -func TestCommonTestSuite(t *testing.T) { -	suite.Run(t, new(CommonTestSuite)) -} diff --git a/internal/text/emojionly.go b/internal/text/emojionly.go new file mode 100644 index 000000000..1a3c0e968 --- /dev/null +++ b/internal/text/emojionly.go @@ -0,0 +1,71 @@ +/* +   GoToSocial +   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( +	"bytes" +	"context" + +	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/yuin/goldmark" +	"github.com/yuin/goldmark/parser" +	"github.com/yuin/goldmark/renderer/html" +	"github.com/yuin/goldmark/util" +) + +func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult { +	result := &FormatResult{ +		Mentions: []*gtsmodel.Mention{}, +		Tags:     []*gtsmodel.Tag{}, +		Emojis:   []*gtsmodel.Emoji{}, +	} +	// parse markdown text into html, using custom renderer to add hashtag/mention links +	md := goldmark.New( +		goldmark.WithRendererOptions( +			html.WithXHTML(), +			html.WithHardWraps(), +		), +		goldmark.WithParser( +			parser.NewParser( +				parser.WithBlockParsers( +					util.Prioritized(newPlaintextParser(), 500), +				), +			), +		), +		goldmark.WithExtensions( +			&customRenderer{f, ctx, pmf, authorID, statusID, true, result}, +		), +	) + +	var htmlContentBytes bytes.Buffer +	err := md.Convert([]byte(plain), &htmlContentBytes) +	if err != nil { +		log.Errorf("error formatting plaintext to HTML: %s", err) +	} +	result.HTML = htmlContentBytes.String() + +	// clean anything dangerous out of the HTML +	result.HTML = SanitizeHTML(result.HTML) + +	// shrink ray +	result.HTML = minifyHTML(result.HTML) + +	return result +} diff --git a/internal/text/formatter.go b/internal/text/formatter.go index cb4de402b..bdad6c0f8 100644 --- a/internal/text/formatter.go +++ b/internal/text/formatter.go @@ -26,20 +26,19 @@ import (  )  // Formatter wraps some logic and functions for parsing statuses and other text input into nice html. +// Each of the member functions returns a struct containing the formatted HTML and any tags, mentions, and +// emoji that were found in the text.  type Formatter interface {  	// FromPlain parses an HTML text from a plaintext. -	FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string +	FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult  	// FromMarkdown parses an HTML text from a markdown-formatted text. -	FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string - -	// ReplaceTags takes a piece of text and a slice of tags, and returns the same text with the tags nicely formatted as hrefs. -	ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string -	// ReplaceMentions takes a piece of text and a slice of mentions, and returns the same text with the mentions nicely formatted as hrefs. -	ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string -	// ReplaceLinks takes a piece of text, finds all recognizable links in that text, and replaces them with hrefs. -	ReplaceLinks(ctx context.Context, in string) string +	FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, md string) *FormatResult +	// FromPlainEmojiOnly parses an HTML text from a plaintext, only parsing emojis and not mentions etc. +	FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult  } +type FormatFunc func(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, text string) *FormatResult +  type formatter struct {  	db db.DB  } @@ -50,3 +49,10 @@ func NewFormatter(db db.DB) Formatter {  		db: db,  	}  } + +type FormatResult struct { +	HTML     string +	Mentions []*gtsmodel.Mention +	Tags     []*gtsmodel.Tag +	Emojis   []*gtsmodel.Emoji +} diff --git a/internal/text/formatter_test.go b/internal/text/formatter_test.go index 438a69c78..32ae74488 100644 --- a/internal/text/formatter_test.go +++ b/internal/text/formatter_test.go @@ -19,9 +19,13 @@  package text_test  import ( +	"context"  	"github.com/stretchr/testify/suite" +	"github.com/superseriousbusiness/gotosocial/internal/concurrency"  	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/messages" +	"github.com/superseriousbusiness/gotosocial/internal/processing"  	"github.com/superseriousbusiness/gotosocial/internal/text"  	"github.com/superseriousbusiness/gotosocial/testrig"  ) @@ -29,7 +33,8 @@ import (  type TextStandardTestSuite struct {  	// standard suite interfaces  	suite.Suite -	db db.DB +	db           db.DB +	parseMention gtsmodel.ParseMentionFunc  	// standard suite models  	testTokens       map[string]*gtsmodel.Token @@ -41,6 +46,7 @@ type TextStandardTestSuite struct {  	testStatuses     map[string]*gtsmodel.Status  	testTags         map[string]*gtsmodel.Tag  	testMentions     map[string]*gtsmodel.Mention +	testEmojis       map[string]*gtsmodel.Emoji  	// module being tested  	formatter text.Formatter @@ -56,6 +62,7 @@ func (suite *TextStandardTestSuite) SetupSuite() {  	suite.testStatuses = testrig.NewTestStatuses()  	suite.testTags = testrig.NewTestTags()  	suite.testMentions = testrig.NewTestMentions() +	suite.testEmojis = testrig.NewTestEmojis()  }  func (suite *TextStandardTestSuite) SetupTest() { @@ -63,6 +70,11 @@ func (suite *TextStandardTestSuite) SetupTest() {  	testrig.InitTestConfig()  	suite.db = testrig.NewTestDB() + +	fedWorker := concurrency.NewWorkerPool[messages.FromFederator](-1, -1) +	federator := testrig.NewTestFederator(suite.db, testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil, "../../testrig/media"), suite.db, fedWorker), nil, nil, fedWorker) +	suite.parseMention = processing.GetParseMentionFunc(suite.db, federator) +  	suite.formatter = text.NewFormatter(suite.db)  	testrig.StandardDBSetup(suite.db, nil) @@ -71,3 +83,11 @@ func (suite *TextStandardTestSuite) SetupTest() {  func (suite *TextStandardTestSuite) TearDownTest() {  	testrig.StandardDBTeardown(suite.db)  } + +func (suite *TextStandardTestSuite) FromMarkdown(text string) *text.FormatResult { +	return suite.formatter.FromMarkdown(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text) +} + +func (suite *TextStandardTestSuite) FromPlain(text string) *text.FormatResult { +	return suite.formatter.FromPlain(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text) +} diff --git a/internal/text/markdownextension.go b/internal/text/goldmark_extension.go index 2d8eae907..11e4fde28 100644 --- a/internal/text/markdownextension.go +++ b/internal/text/goldmark_extension.go @@ -17,8 +17,10 @@ package text  import (  	"context" -	"unicode" +	"fmt" +	"strings" +	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log"  	"github.com/superseriousbusiness/gotosocial/internal/regexes" @@ -46,8 +48,14 @@ type hashtag struct {  	Segment text.Segment  } +type emoji struct { +	ast.BaseInline +	Segment text.Segment +} +  var kindMention = ast.NewNodeKind("Mention")  var kindHashtag = ast.NewNodeKind("Hashtag") +var kindEmoji = ast.NewNodeKind("Emoji")  func (n *mention) Kind() ast.NodeKind {  	return kindMention @@ -57,14 +65,21 @@ func (n *hashtag) Kind() ast.NodeKind {  	return kindHashtag  } -// Dump is used by goldmark for debugging. It is implemented only minimally because -// it is not used in our code. +func (n *emoji) Kind() ast.NodeKind { +	return kindEmoji +} + +// Dump can be used for debugging.  func (n *mention) Dump(source []byte, level int) { -	ast.DumpHelper(n, source, level, nil, nil) +	fmt.Printf("%sMention: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))  }  func (n *hashtag) Dump(source []byte, level int) { -	ast.DumpHelper(n, source, level, nil, nil) +	fmt.Printf("%sHashtag: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source))) +} + +func (n *emoji) Dump(source []byte, level int) { +	fmt.Printf("%sEmoji: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))  }  // newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment. @@ -83,6 +98,13 @@ func newHashtag(s text.Segment) *hashtag {  	}  } +func newEmoji(s text.Segment) *emoji { +	return &emoji{ +		BaseInline: ast.BaseInline{}, +		Segment:    s, +	} +} +  // mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.  type mentionParser struct {  } @@ -90,6 +112,9 @@ type mentionParser struct {  type hashtagParser struct {  } +type emojiParser struct { +} +  func (p *mentionParser) Trigger() []byte {  	return []byte{'@'}  } @@ -98,11 +123,15 @@ func (p *hashtagParser) Trigger() []byte {  	return []byte{'#'}  } +func (p *emojiParser) Trigger() []byte { +	return []byte{':'} +} +  func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {  	before := block.PrecendingCharacter()  	line, segment := block.PeekLine() -	if !unicode.IsSpace(before) { +	if !util.IsMentionOrHashtagBoundary(before) {  		return nil  	} @@ -124,59 +153,88 @@ func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont  	line, segment := block.PeekLine()  	s := string(line) -	if !util.IsHashtagBoundary(before) { +	if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {  		return nil  	}  	for i, r := range s {  		switch {  		case r == '#' && i == 0: +			// ignore initial #  			continue -		case !util.IsPermittedInHashtag(r) && !util.IsHashtagBoundary(r): +		case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):  			// Fake hashtag, don't trust it  			return nil -		case util.IsHashtagBoundary(r): +		case util.IsMentionOrHashtagBoundary(r): +			if i <= 1 { +				// empty +				return nil +			}  			// End of hashtag  			block.Advance(i)  			return newHashtag(segment.WithStop(segment.Start + i))  		}  	} -	// If we don't find invalid characters before the end of the line then it's good -	block.Advance(len(s)) +	// If we don't find invalid characters before the end of the line then it's all hashtag, babey +	block.Advance(segment.Len())  	return newHashtag(segment)  } +func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { +	line, segment := block.PeekLine() + +	// unideal for performance but makes use of existing regex +	loc := regexes.EmojiFinder.FindIndex(line) +	switch { +	case loc == nil: +		fallthrough +	case loc[0] != 0: // fail if not found at start +		return nil +	default: +		block.Advance(loc[1]) +		return newEmoji(segment.WithStop(segment.Start + loc[1])) +	} +} +  // customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces. -// It is created in FromMarkdown to be used a goldmark extension, and the fields are used -// when rendering mentions and tags. +// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the +// fields are used to report tags and mentions to the caller for use as metadata.  type customRenderer struct { -	f        *formatter -	ctx      context.Context -	mentions []*gtsmodel.Mention -	tags     []*gtsmodel.Tag +	f            *formatter +	ctx          context.Context +	parseMention gtsmodel.ParseMentionFunc +	accountID    string +	statusID     string +	emojiOnly    bool +	result       *FormatResult  }  func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {  	reg.Register(kindMention, r.renderMention)  	reg.Register(kindHashtag, r.renderHashtag) +	reg.Register(kindEmoji, r.renderEmoji)  }  func (r *customRenderer) Extend(m goldmark.Markdown) { +	// 1000 is set as the lowest priority, but it's arbitrary  	m.Parser().AddOptions(parser.WithInlineParsers( -		// 500 is pretty arbitrary here, it was copied from example goldmark extension code. -		// https://github.com/yuin/goldmark/blob/75d8cce5b78c7e1d5d9c4ca32c1164f0a1e57b53/extension/strikethrough.go#L111 -		mdutil.Prioritized(&mentionParser{}, 500), -		mdutil.Prioritized(&hashtagParser{}, 500), +		mdutil.Prioritized(&emojiParser{}, 1000),  	)) +	if !r.emojiOnly { +		m.Parser().AddOptions(parser.WithInlineParsers( +			mdutil.Prioritized(&mentionParser{}, 1000), +			mdutil.Prioritized(&hashtagParser{}, 1000), +		)) +	}  	m.Renderer().AddOptions(renderer.WithNodeRenderers( -		mdutil.Prioritized(r, 500), +		mdutil.Prioritized(r, 1000),  	))  }  // renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.  func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {  	if !entering { -		return ast.WalkContinue, nil +		return ast.WalkSkipChildren, nil  	}  	n, ok := node.(*mention) // this function is only registered for kindMention @@ -185,18 +243,18 @@ func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node a  	}  	text := string(n.Segment.Value(source)) -	html := r.f.ReplaceMentions(r.ctx, text, r.mentions) +	html := r.replaceMention(text)  	// we don't have much recourse if this fails  	if _, err := w.WriteString(html); err != nil { -		log.Errorf("error outputting markdown text: %s", err) +		log.Errorf("error writing HTML: %s", err)  	} -	return ast.WalkContinue, nil +	return ast.WalkSkipChildren, nil  }  func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {  	if !entering { -		return ast.WalkContinue, nil +		return ast.WalkSkipChildren, nil  	}  	n, ok := node.(*hashtag) // this function is only registered for kindHashtag @@ -205,11 +263,50 @@ func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node a  	}  	text := string(n.Segment.Value(source)) -	html := r.f.ReplaceTags(r.ctx, text, r.tags) +	html := r.replaceHashtag(text) +	_, err := w.WriteString(html)  	// we don't have much recourse if this fails -	if _, err := w.WriteString(html); err != nil { -		log.Errorf("error outputting markdown text: %s", err) +	if err != nil { +		log.Errorf("error writing HTML: %s", err) +	} +	return ast.WalkSkipChildren, nil +} + +// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata. +func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { +	if !entering { +		return ast.WalkSkipChildren, nil +	} + +	n, ok := node.(*emoji) // this function is only registered for kindEmoji +	if !ok { +		log.Errorf("type assertion failed") +	} +	text := string(n.Segment.Value(source)) +	shortcode := text[1 : len(text)-1] + +	emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "") +	if err != nil { +		if err != db.ErrNoEntries { +			log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err) +		} +	} else if *emoji.VisibleInPicker && !*emoji.Disabled { +		listed := false +		for _, e := range r.result.Emojis { +			if e.Shortcode == emoji.Shortcode { +				listed = true +				break +			} +		} +		if !listed { +			r.result.Emojis = append(r.result.Emojis, emoji) +		} +	} + +	// we don't have much recourse if this fails +	if _, err := w.WriteString(text); err != nil { +		log.Errorf("error writing HTML: %s", err)  	} -	return ast.WalkContinue, nil +	return ast.WalkSkipChildren, nil  } diff --git a/internal/text/goldmark_plaintext.go b/internal/text/goldmark_plaintext.go new file mode 100644 index 000000000..84916b1d1 --- /dev/null +++ b/internal/text/goldmark_plaintext.go @@ -0,0 +1,64 @@ +/* +   GoToSocial +   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( +	"github.com/yuin/goldmark/ast" +	"github.com/yuin/goldmark/parser" +	"github.com/yuin/goldmark/text" +) + +// plaintextParser implements goldmark.parser.BlockParser +type plaintextParser struct { +} + +var defaultPlaintextParser = &plaintextParser{} + +func newPlaintextParser() parser.BlockParser { +	return defaultPlaintextParser +} + +func (b *plaintextParser) Trigger() []byte { +	return nil +} + +func (b *plaintextParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) { +	_, segment := reader.PeekLine() +	node := ast.NewParagraph() +	node.Lines().Append(segment) +	reader.Advance(segment.Len() - 1) +	return node, parser.NoChildren +} + +func (b *plaintextParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State { +	_, segment := reader.PeekLine() +	node.Lines().Append(segment) +	reader.Advance(segment.Len() - 1) +	return parser.Continue | parser.NoChildren +} + +func (b *plaintextParser) Close(node ast.Node, reader text.Reader, pc parser.Context) {} + +func (b *plaintextParser) CanInterruptParagraph() bool { +	return false +} + +func (b *plaintextParser) CanAcceptIndentedLine() bool { +	return true +} diff --git a/internal/text/link.go b/internal/text/link.go deleted file mode 100644 index 2b2b45e73..000000000 --- a/internal/text/link.go +++ /dev/null @@ -1,86 +0,0 @@ -/* -   GoToSocial -   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - -   This program is free software: you can redistribute it and/or modify -   it under the terms of the GNU Affero General Public License as published by -   the Free Software Foundation, either version 3 of the License, or -   (at your option) any later version. - -   This program is distributed in the hope that it will be useful, -   but WITHOUT ANY WARRANTY; without even the implied warranty of -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU Affero General Public License for more details. - -   You should have received a copy of the GNU Affero General Public License -   along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -package text - -import ( -	"bytes" -	"context" -	"net/url" -	"strings" - -	"github.com/superseriousbusiness/gotosocial/internal/regexes" -) - -// FindLinks parses the given string looking for recognizable URLs (including scheme). -// It returns a list of those URLs, without changing the string, or an error if something goes wrong. -// If no URLs are found within the given string, an empty slice and nil will be returned. -func FindLinks(in string) []*url.URL { -	var urls []*url.URL - -	// bail already if we don't find anything -	found := regexes.LinkScheme.FindAllString(in, -1) -	if len(found) == 0 { -		return nil -	} - -	urlmap := map[string]struct{}{} - -	// for each string we find, we want to parse it into a URL if we can -	// if we fail to parse it, just ignore this match and continue -	for _, f := range found { -		u, err := url.Parse(f) -		if err != nil { -			continue -		} - -		// Calculate string -		ustr := u.String() - -		if _, ok := urlmap[ustr]; !ok { -			// Has not been encountered yet -			urls = append(urls, u) -			urlmap[ustr] = struct{}{} -		} -	} - -	return urls -} - -// ReplaceLinks replaces all detected links in a piece of text with their HTML (href) equivalents. -// Note: because Go doesn't allow negative lookbehinds in regex, it's possible that an already-formatted -// href will end up double-formatted, if the text you pass here contains one or more hrefs already. -// To avoid this, you should sanitize any HTML out of text before you pass it into this function. -func (f *formatter) ReplaceLinks(ctx context.Context, in string) string { -	return regexes.ReplaceAllStringFunc(regexes.LinkScheme, in, func(urlString string, buf *bytes.Buffer) string { -		thisURL, err := url.Parse(urlString) -		if err != nil { -			return urlString // we can't parse it as a URL so don't replace it -		} -		// <a href="thisURL.String()" rel="noopener">urlString</a> -		urlString = thisURL.String() -		buf.WriteString(`<a href="`) -		buf.WriteString(thisURL.String()) -		buf.WriteString(`" rel="noopener">`) -		urlString = strings.TrimPrefix(urlString, thisURL.Scheme) -		urlString = strings.TrimPrefix(urlString, "://") -		buf.WriteString(urlString) -		buf.WriteString(`</a>`) -		return buf.String() -	}) -} diff --git a/internal/text/link_test.go b/internal/text/link_test.go deleted file mode 100644 index dfb4656b8..000000000 --- a/internal/text/link_test.go +++ /dev/null @@ -1,157 +0,0 @@ -/* -   GoToSocial -   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - -   This program is free software: you can redistribute it and/or modify -   it under the terms of the GNU Affero General Public License as published by -   the Free Software Foundation, either version 3 of the License, or -   (at your option) any later version. - -   This program is distributed in the hope that it will be useful, -   but WITHOUT ANY WARRANTY; without even the implied warranty of -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU Affero General Public License for more details. - -   You should have received a copy of the GNU Affero General Public License -   along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -package text_test - -import ( -	"context" -	"testing" - -	"github.com/stretchr/testify/assert" -	"github.com/stretchr/testify/suite" -	"github.com/superseriousbusiness/gotosocial/internal/text" -) - -const text1 = ` -This is a text with some links in it. Here's link number one: https://example.org/link/to/something#fragment - -Here's link number two: http://test.example.org?q=bahhhhhhhhhhhh - -https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it - -really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme - -https://example.orghttps://google.com <-- this shouldn't work either, but it does?! OK -` - -const text2 = ` -this is one link: https://example.org - -this is the same link again: https://example.org - -these should be deduplicated -` - -const text3 = ` -here's a mailto link: mailto:whatever@test.org -` - -const text4 = ` -two similar links: - -https://example.org - -https://example.org/test -` - -const text5 = ` -what happens when we already have a link within an href? - -<a href="https://example.org">https://example.org</a> -` - -type LinkTestSuite struct { -	TextStandardTestSuite -} - -func (suite *LinkTestSuite) TestParseSimple() { -	f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) -	suite.Equal(simpleExpected, f) -} - -func (suite *LinkTestSuite) TestParseURLsFromText1() { -	urls := text.FindLinks(text1) - -	suite.Equal("https://example.org/link/to/something#fragment", urls[0].String()) -	suite.Equal("http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) -	suite.Equal("https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) -	suite.Equal("https://example.orghttps://google.com", urls[3].String()) -} - -func (suite *LinkTestSuite) TestParseURLsFromText2() { -	urls := text.FindLinks(text2) - -	// assert length 1 because the found links will be deduplicated -	assert.Len(suite.T(), urls, 1) -} - -func (suite *LinkTestSuite) TestParseURLsFromText3() { -	urls := text.FindLinks(text3) - -	// assert length 0 because `mailto:` isn't accepted -	assert.Len(suite.T(), urls, 0) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText1() { -	replaced := suite.formatter.ReplaceLinks(context.Background(), text1) -	suite.Equal(` -This is a text with some links in it. Here's link number one: <a href="https://example.org/link/to/something#fragment" rel="noopener">example.org/link/to/something#fragment</a> - -Here's link number two: <a href="http://test.example.org?q=bahhhhhhhhhhhh" rel="noopener">test.example.org?q=bahhhhhhhhhhhh</a> - -<a href="https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it" rel="noopener">another.link.example.org/with/a/pretty/long/path/at/the/end/of/it</a> - -really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme - -<a href="https://example.orghttps://google.com" rel="noopener">example.orghttps://google.com</a> <-- this shouldn't work either, but it does?! OK -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText2() { -	replaced := suite.formatter.ReplaceLinks(context.Background(), text2) -	suite.Equal(` -this is one link: <a href="https://example.org" rel="noopener">example.org</a> - -this is the same link again: <a href="https://example.org" rel="noopener">example.org</a> - -these should be deduplicated -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText3() { -	// we know mailto links won't be replaced with hrefs -- we only accept https and http -	replaced := suite.formatter.ReplaceLinks(context.Background(), text3) -	suite.Equal(` -here's a mailto link: mailto:whatever@test.org -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText4() { -	replaced := suite.formatter.ReplaceLinks(context.Background(), text4) -	suite.Equal(` -two similar links: - -<a href="https://example.org" rel="noopener">example.org</a> - -<a href="https://example.org/test" rel="noopener">example.org/test</a> -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText5() { -	// we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function -	replaced := suite.formatter.ReplaceLinks(context.Background(), text5) -	suite.Equal(` -what happens when we already have a link within an href? - -<a href="<a href="https://example.org" rel="noopener">example.org</a>"><a href="https://example.org" rel="noopener">example.org</a></a> -`, replaced) -} - -func TestLinkTestSuite(t *testing.T) { -	suite.Run(t, new(LinkTestSuite)) -} diff --git a/internal/text/markdown.go b/internal/text/markdown.go index dbe86d110..232f0f723 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -21,32 +21,19 @@ package text  import (  	"bytes"  	"context" -	"strings"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log" -	"github.com/tdewolff/minify/v2" -	minifyHtml "github.com/tdewolff/minify/v2/html"  	"github.com/yuin/goldmark"  	"github.com/yuin/goldmark/extension"  	"github.com/yuin/goldmark/renderer/html"  ) -var ( -	m *minify.M -) - -func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string { - -	// Temporarily replace all found emoji shortcodes in the markdown text with -	// their ID so that they're not parsed as anything by the markdown parser - -	// this fixes cases where emojis with some underscores in them are parsed as -	// words with emphasis, eg `:_some_emoji:` becomes `:<em>some</em>emoji:` -	// -	// Since the IDs of the emojis are just uppercase letters + numbers they should -	// be safe to pass through the markdown parser without unexpected effects. -	for _, e := range emojis { -		markdownText = strings.ReplaceAll(markdownText, ":"+e.Shortcode+":", ":"+e.ID+":") +func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, markdownText string) *FormatResult { +	result := &FormatResult{ +		Mentions: []*gtsmodel.Mention{}, +		Tags:     []*gtsmodel.Tag{}, +		Emojis:   []*gtsmodel.Emoji{},  	}  	// parse markdown text into html, using custom renderer to add hashtag/mention links @@ -57,7 +44,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti  			html.WithUnsafe(), // allows raw HTML  		),  		goldmark.WithExtensions( -			&customRenderer{f, ctx, mentions, tags}, +			&customRenderer{f, ctx, pmf, authorID, statusID, false, result},  			extension.Linkify, // turns URLs into links  			extension.Strikethrough,  		), @@ -66,30 +53,15 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti  	var htmlContentBytes bytes.Buffer  	err := md.Convert([]byte(markdownText), &htmlContentBytes)  	if err != nil { -		log.Errorf("error rendering markdown to HTML: %s", err) -	} -	htmlContent := htmlContentBytes.String() - -	// Replace emoji IDs in the parsed html content with their shortcodes again -	for _, e := range emojis { -		htmlContent = strings.ReplaceAll(htmlContent, ":"+e.ID+":", ":"+e.Shortcode+":") +		log.Errorf("error formatting markdown to HTML: %s", err)  	} +	result.HTML = htmlContentBytes.String() -	// clean anything dangerous out of the html -	htmlContent = SanitizeHTML(htmlContent) +	// clean anything dangerous out of the HTML +	result.HTML = SanitizeHTML(result.HTML) -	if m == nil { -		m = minify.New() -		m.Add("text/html", &minifyHtml.Minifier{ -			KeepEndTags: true, -			KeepQuotes:  true, -		}) -	} - -	minified, err := m.String("text/html", htmlContent) -	if err != nil { -		log.Errorf("error minifying markdown text: %s", err) -	} +	// shrink ray +	result.HTML = minifyHTML(result.HTML) -	return minified +	return result  } diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go index 384f4389c..80547f8b3 100644 --- a/internal/text/markdown_test.go +++ b/internal/text/markdown_test.go @@ -19,11 +19,9 @@  package text_test  import ( -	"context"  	"testing"  	"github.com/stretchr/testify/suite" -	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  )  var withCodeBlock = `# Title @@ -77,6 +75,16 @@ const (  	mdWithStrikethroughExpected     = "<p>I have <del>mdae</del> made an error</p>"  	mdWithLink                      = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial"  	mdWithLinkExpected              = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>" +	mdObjectInCodeBlock             = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps" +	mdObjectInCodeBlockExpected     = "<p><span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span> this is how to mention a user</p><pre><code>@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n</code></pre><p>hope that helps</p>" +	mdItalicHashtag                 = "_#hashtag_" +	mdItalicHashtagExpected         = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" +	mdItalicHashtags                = "_#hashtag #hashtag #hashtag_" +	mdItalicHashtagsExpected        = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" +	// BEWARE: sneaky unicode business going on. +	// the first ö is one rune, the second ö is an o with a combining diacritic. +	mdUnnormalizedHashtag         = "#hellöthere #hellöthere" +	mdUnnormalizedHashtagExpected = "<p><a href=\"http://localhost:8080/tags/hell%C3%B6there\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hellöthere</span></a> <a href=\"http://localhost:8080/tags/hell%C3%B6there\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hellöthere</span></a></p>"  )  type MarkdownTestSuite struct { @@ -84,101 +92,112 @@ type MarkdownTestSuite struct {  }  func (suite *MarkdownTestSuite) TestParseSimple() { -	s := suite.formatter.FromMarkdown(context.Background(), simpleMarkdown, nil, nil, nil) -	suite.Equal(simpleMarkdownExpected, s) +	formatted := suite.FromMarkdown(simpleMarkdown) +	suite.Equal(simpleMarkdownExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithCodeBlock() { -	s := suite.formatter.FromMarkdown(context.Background(), withCodeBlock, nil, nil, nil) -	suite.Equal(withCodeBlockExpected, s) +	formatted := suite.FromMarkdown(withCodeBlock) +	suite.Equal(withCodeBlockExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithInlineCode() { -	s := suite.formatter.FromMarkdown(context.Background(), withInlineCode, nil, nil, nil) -	suite.Equal(withInlineCodeExpected, s) +	formatted := suite.FromMarkdown(withInlineCode) +	suite.Equal(withInlineCodeExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithInlineCode2() { -	s := suite.formatter.FromMarkdown(context.Background(), withInlineCode2, nil, nil, nil) -	suite.Equal(withInlineCode2Expected, s) +	formatted := suite.FromMarkdown(withInlineCode2) +	suite.Equal(withInlineCode2Expected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithHashtag() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	} - -	s := suite.formatter.FromMarkdown(context.Background(), withHashtag, nil, foundTags, nil) -	suite.Equal(withHashtagExpected, s) +	formatted := suite.FromMarkdown(withHashtag) +	suite.Equal(withHashtagExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithHTML() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil, nil) -	suite.Equal(mdWithHTMLExpected, s) +	formatted := suite.FromMarkdown(mdWithHTML) +	suite.Equal(mdWithHTMLExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil, nil) -	suite.Equal(mdWithCheekyHTMLExpected, s) +	formatted := suite.FromMarkdown(mdWithCheekyHTML) +	suite.Equal(mdWithCheekyHTMLExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithHashtagInitial() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithHashtagInitial, nil, []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -		suite.testTags["welcome"], -	}, nil) -	suite.Equal(mdWithHashtagInitialExpected, s) +	formatted := suite.FromMarkdown(mdWithHashtagInitial) +	suite.Equal(mdWithHashtagInitialExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseCodeBlockWithNewlines() { -	s := suite.formatter.FromMarkdown(context.Background(), mdCodeBlockWithNewlines, nil, nil, nil) -	suite.Equal(mdCodeBlockWithNewlinesExpected, s) +	formatted := suite.FromMarkdown(mdCodeBlockWithNewlines) +	suite.Equal(mdCodeBlockWithNewlinesExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithFootnote() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithFootnote, nil, nil, nil) -	suite.Equal(mdWithFootnoteExpected, s) +	formatted := suite.FromMarkdown(mdWithFootnote) +	suite.Equal(mdWithFootnoteExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithBlockquote() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithBlockQuote, nil, nil, nil) -	suite.Equal(mdWithBlockQuoteExpected, s) +	formatted := suite.FromMarkdown(mdWithBlockQuote) +	suite.Equal(mdWithBlockQuoteExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseHashtagWithCodeBlock() { -	s := suite.formatter.FromMarkdown(context.Background(), mdHashtagAndCodeBlock, nil, []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	}, nil) -	suite.Equal(mdHashtagAndCodeBlockExpected, s) +	formatted := suite.FromMarkdown(mdHashtagAndCodeBlock) +	suite.Equal(mdHashtagAndCodeBlockExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseMentionWithCodeBlock() { -	s := suite.formatter.FromMarkdown(context.Background(), mdMentionAndCodeBlock, []*gtsmodel.Mention{ -		suite.testMentions["local_user_2_mention_zork"], -	}, nil, nil) -	suite.Equal(mdMentionAndCodeBlockExpected, s) +	formatted := suite.FromMarkdown(mdMentionAndCodeBlock) +	suite.Equal(mdMentionAndCodeBlockExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseSmartypants() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithSmartypants, []*gtsmodel.Mention{ -		suite.testMentions["local_user_2_mention_zork"], -	}, nil, nil) -	suite.Equal(mdWithSmartypantsExpected, s) +	formatted := suite.FromMarkdown(mdWithSmartypants) +	suite.Equal(mdWithSmartypantsExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseAsciiHeart() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithAsciiHeart, nil, nil, nil) -	suite.Equal(mdWithAsciiHeartExpected, s) +	formatted := suite.FromMarkdown(mdWithAsciiHeart) +	suite.Equal(mdWithAsciiHeartExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseStrikethrough() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithStrikethrough, nil, nil, nil) -	suite.Equal(mdWithStrikethroughExpected, s) +	formatted := suite.FromMarkdown(mdWithStrikethrough) +	suite.Equal(mdWithStrikethroughExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseLink() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithLink, nil, nil, nil) -	suite.Equal(mdWithLinkExpected, s) +	formatted := suite.FromMarkdown(mdWithLink) +	suite.Equal(mdWithLinkExpected, formatted.HTML) +} + +func (suite *MarkdownTestSuite) TestParseObjectInCodeBlock() { +	formatted := suite.FromMarkdown(mdObjectInCodeBlock) +	suite.Equal(mdObjectInCodeBlockExpected, formatted.HTML) +	suite.Len(formatted.Mentions, 1) +	suite.Equal("@foss_satan@fossbros-anonymous.io", formatted.Mentions[0].NameString) +	suite.Empty(formatted.Tags) +	suite.Empty(formatted.Emojis) +} + +func (suite *MarkdownTestSuite) TestParseItalicHashtag() { +	formatted := suite.FromMarkdown(mdItalicHashtag) +	suite.Equal(mdItalicHashtagExpected, formatted.HTML) +} + +func (suite *MarkdownTestSuite) TestParseItalicHashtags() { +	formatted := suite.FromMarkdown(mdItalicHashtags) +	suite.Equal(mdItalicHashtagsExpected, formatted.HTML) +} + +func (suite *MarkdownTestSuite) TestParseUnnormalizedHashtag() { +	formatted := suite.FromMarkdown(mdUnnormalizedHashtag) +	suite.Equal(mdUnnormalizedHashtagExpected, formatted.HTML)  }  func TestMarkdownTestSuite(t *testing.T) { diff --git a/internal/text/minify.go b/internal/text/minify.go new file mode 100644 index 000000000..62562c7ca --- /dev/null +++ b/internal/text/minify.go @@ -0,0 +1,45 @@ +/* +   GoToSocial +   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( +	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/tdewolff/minify/v2" +	"github.com/tdewolff/minify/v2/html" +) + +var ( +	m *minify.M +) + +func minifyHTML(content string) string { +	if m == nil { +		m = minify.New() +		m.Add("text/html", &html.Minifier{ +			KeepEndTags: true, +			KeepQuotes:  true, +		}) +	} + +	minified, err := m.String("text/html", content) +	if err != nil { +		log.Errorf("error minifying HTML: %s", err) +	} +	return minified +} diff --git a/internal/text/plain.go b/internal/text/plain.go index a64a14f06..3549200c6 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -19,40 +19,56 @@  package text  import ( +	"bytes"  	"context" -	"html" -	"strings"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/yuin/goldmark" +	"github.com/yuin/goldmark/extension" +	"github.com/yuin/goldmark/parser" +	"github.com/yuin/goldmark/renderer/html" +	"github.com/yuin/goldmark/util"  ) -// breakReplacer replaces new-lines with HTML breaks. -var breakReplacer = strings.NewReplacer( -	"\r\n", "<br/>", -	"\n", "<br/>", -) - -func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { -	// trim any crap -	content := strings.TrimSpace(plain) - -	// clean 'er up -	content = html.EscapeString(content) - -	// format links nicely -	content = f.ReplaceLinks(ctx, content) +func (f *formatter) FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult { +	result := &FormatResult{ +		Mentions: []*gtsmodel.Mention{}, +		Tags:     []*gtsmodel.Tag{}, +		Emojis:   []*gtsmodel.Emoji{}, +	} -	// format tags nicely -	content = f.ReplaceTags(ctx, content, tags) +	// parse markdown text into html, using custom renderer to add hashtag/mention links +	md := goldmark.New( +		goldmark.WithRendererOptions( +			html.WithXHTML(), +			html.WithHardWraps(), +		), +		goldmark.WithParser( +			parser.NewParser( +				parser.WithBlockParsers( +					util.Prioritized(newPlaintextParser(), 500), +				), +			), +		), +		goldmark.WithExtensions( +			&customRenderer{f, ctx, pmf, authorID, statusID, false, result}, +			extension.Linkify, // turns URLs into links +		), +	) -	// format mentions nicely -	content = f.ReplaceMentions(ctx, content, mentions) +	var htmlContentBytes bytes.Buffer +	err := md.Convert([]byte(plain), &htmlContentBytes) +	if err != nil { +		log.Errorf("error formatting plaintext to HTML: %s", err) +	} +	result.HTML = htmlContentBytes.String() -	// replace newlines with breaks -	content = breakReplacer.Replace(content) +	// clean anything dangerous out of the HTML +	result.HTML = SanitizeHTML(result.HTML) -	// wrap the whole thing in a pee -	content = `<p>` + content + `</p>` +	// shrink ray +	result.HTML = minifyHTML(result.HTML) -	return SanitizeHTML(content) +	return result  } diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 6b850cb45..3693ada9a 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -19,22 +19,21 @@  package text_test  import ( -	"context"  	"testing" +	"github.com/stretchr/testify/assert"  	"github.com/stretchr/testify/suite" -	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  )  const ( -	simple           = "this is a plain and simple status" -	simpleExpected   = "<p>this is a plain and simple status</p>" -	withTag          = "here's a simple status that uses hashtag #welcome!" -	withTagExpected  = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" -	withHTML         = "<div>blah this should just be html escaped blah</div>" -	withHTMLExpected = "<p><div>blah this should just be html escaped blah</div></p>" -	moreComplex      = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" -	moreComplexFull  = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/>Text</p>" +	simple              = "this is a plain and simple status" +	simpleExpected      = "<p>this is a plain and simple status</p>" +	withTag             = "here's a simple status that uses hashtag #welcome!" +	withTagExpected     = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" +	withHTML            = "<div>blah this should just be html escaped blah</div>" +	withHTMLExpected    = "<p><div>blah this should just be html escaped blah</div></p>" +	moreComplex         = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText\n\n:rainbow:" +	moreComplexExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text<br><br>:rainbow:</p>"  )  type PlainTestSuite struct { @@ -42,35 +41,105 @@ type PlainTestSuite struct {  }  func (suite *PlainTestSuite) TestParseSimple() { -	f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) -	suite.Equal(simpleExpected, f) +	formatted := suite.FromPlain(simple) +	suite.Equal(simpleExpected, formatted.HTML)  }  func (suite *PlainTestSuite) TestParseWithTag() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["welcome"], -	} - -	f := suite.formatter.FromPlain(context.Background(), withTag, nil, foundTags) -	suite.Equal(withTagExpected, f) +	formatted := suite.FromPlain(withTag) +	suite.Equal(withTagExpected, formatted.HTML)  }  func (suite *PlainTestSuite) TestParseWithHTML() { -	f := suite.formatter.FromPlain(context.Background(), withHTML, nil, nil) -	suite.Equal(withHTMLExpected, f) +	formatted := suite.FromPlain(withHTML) +	suite.Equal(withHTMLExpected, formatted.HTML)  }  func (suite *PlainTestSuite) TestParseMoreComplex() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	} +	formatted := suite.FromPlain(moreComplex) +	suite.Equal(moreComplexExpected, formatted.HTML) +} + +func (suite *PlainTestSuite) TestLinkNoMention() { +	statusText := `here's a link to a post by zork + +https://example.com/@the_mighty_zork/statuses/01FGVP55XMF2K6316MQRX6PFG1 + +that link shouldn't come out formatted as a mention!` + +	menchies := suite.FromPlain(statusText).Mentions +	suite.Empty(menchies) +} + +func (suite *PlainTestSuite) TestDeriveMentionsEmpty() { +	statusText := `` +	menchies := suite.FromPlain(statusText).Mentions +	assert.Len(suite.T(), menchies, 0) +} + +func (suite *PlainTestSuite) TestDeriveHashtagsOK() { +	statusText := `weeeeeeee #testing123 #also testing + +# testing this one shouldn't work + +			#thisshouldwork #dupe #dupe!! #dupe + +	here's a link with a fragment: https://example.org/whatever#ahhh +	here's another link with a fragment: https://example.org/whatever/#ahhh -	foundMentions := []*gtsmodel.Mention{ -		suite.testMentions["zork_mention_foss_satan"], -	} +(#ThisShouldAlsoWork) #this_should_be_split + +#111111 thisalsoshouldn'twork#### ## + +#alimentación, #saúde, #lävistää, #ö, #네 +#ThisOneIsThirtyOneCharactersLon...  ...ng +#ThisOneIsThirteyCharactersLong +` + +	tags := suite.FromPlain(statusText).Tags +	assert.Len(suite.T(), tags, 13) +	assert.Equal(suite.T(), "testing123", tags[0].Name) +	assert.Equal(suite.T(), "also", tags[1].Name) +	assert.Equal(suite.T(), "thisshouldwork", tags[2].Name) +	assert.Equal(suite.T(), "dupe", tags[3].Name) +	assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4].Name) +	assert.Equal(suite.T(), "this", tags[5].Name) +	assert.Equal(suite.T(), "111111", tags[6].Name) +	assert.Equal(suite.T(), "alimentación", tags[7].Name) +	assert.Equal(suite.T(), "saúde", tags[8].Name) +	assert.Equal(suite.T(), "lävistää", tags[9].Name) +	assert.Equal(suite.T(), "ö", tags[10].Name) +	assert.Equal(suite.T(), "네", tags[11].Name) +	assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[12].Name) + +	statusText = `#올빼미 hej` +	tags = suite.FromPlain(statusText).Tags +	assert.Equal(suite.T(), "올빼미", tags[0].Name) +} + +func (suite *PlainTestSuite) TestDeriveMultiple() { +	statusText := `Another test @foss_satan@fossbros-anonymous.io + +	#Hashtag + +	Text` + +	f := suite.FromPlain(statusText) + +	assert.Len(suite.T(), f.Mentions, 1) +	assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString) + +	assert.Len(suite.T(), f.Tags, 1) +	assert.Equal(suite.T(), "Hashtag", f.Tags[0].Name) + +	assert.Len(suite.T(), f.Emojis, 0) +} -	f := suite.formatter.FromPlain(context.Background(), moreComplex, foundMentions, foundTags) -	suite.Equal(moreComplexFull, f) +func (suite *PlainTestSuite) TestZalgoHashtag() { +	statusText := `yo who else loves #praying to #z̸͉̅a̸͚͋l̵͈̊g̸̫͌ỏ̷̪?` +	f := suite.FromPlain(statusText) +	assert.Len(suite.T(), f.Tags, 1) +	assert.Equal(suite.T(), "praying", f.Tags[0].Name)  }  func TestPlainTestSuite(t *testing.T) { diff --git a/internal/text/replace.go b/internal/text/replace.go new file mode 100644 index 000000000..5deab5d4d --- /dev/null +++ b/internal/text/replace.go @@ -0,0 +1,141 @@ +/* +   GoToSocial +   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( +	"errors" +	"github.com/superseriousbusiness/gotosocial/internal/db" +	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/superseriousbusiness/gotosocial/internal/util" +	"golang.org/x/text/unicode/norm" +	"strings" +) + +const ( +	maximumHashtagLength = 30 +) + +// given a mention or a hashtag string, the methods in this file will attempt to parse it, +// add it to the database, and render it as HTML. If any of these steps fails, the method +// will just return the original string and log an error. + +// replaceMention takes a string in the form @username@domain.com or @localusername +func (r *customRenderer) replaceMention(text string) string { +	menchie, err := r.parseMention(r.ctx, text, r.accountID, r.statusID) +	if err != nil { +		log.Errorf("error parsing mention %s from status: %s", text, err) +		return text +	} + +	if r.statusID != "" { +		if err := r.f.db.Put(r.ctx, menchie); err != nil { +			log.Errorf("error putting mention in db: %s", err) +			return text +		} +	} + +	// only append if it's not been listed yet +	listed := false +	for _, m := range r.result.Mentions { +		if menchie.ID == m.ID { +			listed = true +			break +		} +	} +	if !listed { +		r.result.Mentions = append(r.result.Mentions, menchie) +	} + +	// make sure we have an account attached to this mention +	if menchie.TargetAccount == nil { +		a, err := r.f.db.GetAccountByID(r.ctx, menchie.TargetAccountID) +		if err != nil { +			log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err) +			return text +		} +		menchie.TargetAccount = a +	} + +	// The mention's target is our target +	targetAccount := menchie.TargetAccount + +	var b strings.Builder + +	// replace the mention with the formatted mention content +	// <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span> +	b.WriteString(`<span class="h-card"><a href="`) +	b.WriteString(targetAccount.URL) +	b.WriteString(`" class="u-url mention">@<span>`) +	b.WriteString(targetAccount.Username) +	b.WriteString(`</span></a></span>`) +	return b.String() +} + +// replaceMention takes a string in the form #HashedTag, and will normalize it before +// adding it to the db and turning it into HTML. +func (r *customRenderer) replaceHashtag(text string) string { +	// this normalization is specifically to avoid cases where visually-identical +	// hashtags are stored with different unicode representations (e.g. with combining +	// diacritics). It allows a tasteful number of combining diacritics to be used, +	// as long as they can be combined with parent characters to form regular letter +	// symbols. +	normalized := norm.NFC.String(text[1:]) + +	for i, r := range normalized { +		if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) { +			return text +		} +	} + +	tag, err := r.f.db.TagStringToTag(r.ctx, normalized, r.accountID) +	if err != nil { +		log.Errorf("error generating hashtags from status: %s", err) +		return text +	} + +	// only append if it's not been listed yet +	listed := false +	for _, t := range r.result.Tags { +		if tag.ID == t.ID { +			listed = true +			break +		} +	} +	if !listed { +		err = r.f.db.Put(r.ctx, tag) +		if err != nil { +			if !errors.Is(err, db.ErrAlreadyExists) { +				log.Errorf("error putting tags in db: %s", err) +				return text +			} +		} +		r.result.Tags = append(r.result.Tags, tag) +	} + +	var b strings.Builder +	// replace the #tag with the formatted tag content +	// `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a> +	b.WriteString(`<a href="`) +	b.WriteString(tag.URL) +	b.WriteString(`" class="mention hashtag" rel="tag">#<span>`) +	b.WriteString(normalized) +	b.WriteString(`</span></a>`) + +	return b.String() +} | 
