diff options
Diffstat (limited to 'internal/text')
-rw-r--r-- | internal/text/markdown.go | 72 | ||||
-rw-r--r-- | internal/text/markdown_test.go | 21 | ||||
-rw-r--r-- | internal/text/markdownextension.go | 215 |
3 files changed, 261 insertions, 47 deletions
diff --git a/internal/text/markdown.go b/internal/text/markdown.go index 837f213e1..56edf2d01 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -21,62 +21,23 @@ package text import ( "bytes" "context" - "io" "strings" - "github.com/russross/blackfriday/v2" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/tdewolff/minify/v2" - "github.com/tdewolff/minify/v2/html" + minifyHtml "github.com/tdewolff/minify/v2/html" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/renderer/html" ) var ( - bfExtensions = blackfriday.NoIntraEmphasis | blackfriday.FencedCode | blackfriday.Autolink | blackfriday.Strikethrough | blackfriday.SpaceHeadings | blackfriday.HardLineBreak - m *minify.M + m *minify.M ) -type renderer struct { - f *formatter - ctx context.Context - mentions []*gtsmodel.Mention - tags []*gtsmodel.Tag - blackfriday.HTMLRenderer -} - -func (r *renderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - if node.Type == blackfriday.Text { - // call RenderNode to do the html escaping - var buff bytes.Buffer - status := r.HTMLRenderer.RenderNode(&buff, node, entering) - - html := buff.String() - html = r.f.ReplaceTags(r.ctx, html, r.tags) - html = r.f.ReplaceMentions(r.ctx, html, r.mentions) - - // we don't have much recourse if this fails - if _, err := io.WriteString(w, html); err != nil { - log.Errorf("error outputting markdown text: %s", err) - } - return status - } - return r.HTMLRenderer.RenderNode(w, node, entering) -} - func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string { - renderer := &renderer{ - f: f, - ctx: ctx, - mentions: mentions, - tags: tags, - HTMLRenderer: *blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{ - // same as blackfriday.CommonHTMLFlags, but with Smartypants disabled - // ref: https://github.com/superseriousbusiness/gotosocial/issues/1028 - Flags: blackfriday.UseXHTML, - }), - } - // Temporarily replace all found emoji shortcodes in the markdown text with // their ID so that they're not parsed as anything by the markdown parser - // this fixes cases where emojis with some underscores in them are parsed as @@ -89,8 +50,25 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti } // parse markdown text into html, using custom renderer to add hashtag/mention links - htmlContentBytes := blackfriday.Run([]byte(markdownText), blackfriday.WithExtensions(bfExtensions), blackfriday.WithRenderer(renderer)) - htmlContent := string(htmlContentBytes) + md := goldmark.New( + goldmark.WithRendererOptions( + html.WithXHTML(), + html.WithHardWraps(), + html.WithUnsafe(), // allows raw HTML + ), + goldmark.WithExtensions( + &customRenderer{f, ctx, mentions, tags}, + extension.Linkify, // turns URLs into links + extension.Strikethrough, + ), + ) + + var htmlContentBytes bytes.Buffer + err := md.Convert([]byte(markdownText), &htmlContentBytes) + if err != nil { + log.Errorf("error rendering markdown to HTML: %s", err) + } + htmlContent := htmlContentBytes.String() // Replace emoji IDs in the parsed html content with their shortcodes again for _, e := range emojis { @@ -102,7 +80,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti if m == nil { m = minify.New() - m.Add("text/html", &html.Minifier{ + m.Add("text/html", &minifyHtml.Minifier{ KeepEndTags: true, KeepQuotes: true, }) diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go index 6b7c1032f..3a67218d4 100644 --- a/internal/text/markdown_test.go +++ b/internal/text/markdown_test.go @@ -71,6 +71,12 @@ const ( mdMentionAndCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>the_mighty_zork</span></a></span></p><pre><code>@the_mighty_zork\n</code></pre>" mdWithSmartypants = "\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping" mdWithSmartypantsExpected = "<p>\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping</p>" + mdWithAsciiHeart = "hello <3 old friend <3 i loved u </3 :(( you stole my heart" + mdWithAsciiHeartExpected = "<p>hello <3 old friend <3 i loved u </3 :(( you stole my heart</p>" + mdWithStrikethrough = "I have ~~mdae~~ made an error" + mdWithStrikethroughExpected = "<p>I have <del>mdae</del> made an error</p>" + mdWithLink = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial" + mdWithLinkExpected = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>" ) type MarkdownTestSuite struct { @@ -160,6 +166,21 @@ func (suite *MarkdownTestSuite) TestParseSmartypants() { suite.Equal(mdWithSmartypantsExpected, s) } +func (suite *MarkdownTestSuite) TestParseAsciiHeart() { + s := suite.formatter.FromMarkdown(context.Background(), mdWithAsciiHeart, nil, nil, nil) + suite.Equal(mdWithAsciiHeartExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseStrikethrough() { + s := suite.formatter.FromMarkdown(context.Background(), mdWithStrikethrough, nil, nil, nil) + suite.Equal(mdWithStrikethroughExpected, s) +} + +func (suite *MarkdownTestSuite) TestParseLink() { + s := suite.formatter.FromMarkdown(context.Background(), mdWithLink, nil, nil, nil) + suite.Equal(mdWithLinkExpected, s) +} + func TestMarkdownTestSuite(t *testing.T) { suite.Run(t, new(MarkdownTestSuite)) } diff --git a/internal/text/markdownextension.go b/internal/text/markdownextension.go new file mode 100644 index 000000000..1be1e99d6 --- /dev/null +++ b/internal/text/markdownextension.go @@ -0,0 +1,215 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "context" + "unicode" + + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/regexes" + "github.com/superseriousbusiness/gotosocial/internal/util" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/text" + mdutil "github.com/yuin/goldmark/util" +) + +// A goldmark extension that parses potential mentions and hashtags separately from regular +// text, so that they stay as one contiguous text fragment in the AST, and then renders +// them separately too, to avoid scanning normal text for mentions and tags. + +// mention and hashtag fulfil the goldmark ast.Node interface. +type mention struct { + ast.BaseInline + Segment text.Segment +} + +type hashtag struct { + ast.BaseInline + Segment text.Segment +} + +var kindMention = ast.NewNodeKind("Mention") +var kindHashtag = ast.NewNodeKind("Hashtag") + +func (n *mention) Kind() ast.NodeKind { + return kindMention +} + +func (n *hashtag) Kind() ast.NodeKind { + return kindHashtag +} + +// Dump is used by goldmark for debugging. It is implemented only minimally because +// it is not used in our code. +func (n *mention) Dump(source []byte, level int) { + ast.DumpHelper(n, source, level, nil, nil) +} + +func (n *hashtag) Dump(source []byte, level int) { + ast.DumpHelper(n, source, level, nil, nil) +} + +// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment. +// The contained segment is used in rendering. +func newMention(s text.Segment) *mention { + return &mention{ + BaseInline: ast.BaseInline{}, + Segment: s, + } +} + +func newHashtag(s text.Segment) *hashtag { + return &hashtag{ + BaseInline: ast.BaseInline{}, + Segment: s, + } +} + +// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface. +type mentionParser struct { +} + +type hashtagParser struct { +} + +func (p *mentionParser) Trigger() []byte { + return []byte{'@'} +} + +func (p *hashtagParser) Trigger() []byte { + return []byte{'#'} +} + +func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + before := block.PrecendingCharacter() + line, segment := block.PeekLine() + + if !unicode.IsSpace(before) { + return nil + } + + // unideal for performance but makes use of existing regex + loc := regexes.MentionFinder.FindIndex(line) + switch { + case loc == nil: + fallthrough + case loc[0] != 0: // fail if not found at start + return nil + default: + block.Advance(loc[1]) + return newMention(segment.WithStop(segment.Start + loc[1])) + } +} + +func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + before := block.PrecendingCharacter() + line, segment := block.PeekLine() + s := string(line) + + if !util.IsHashtagBoundary(before) { + return nil + } + + for i, r := range s { + switch { + case r == '#' && i == 0: + continue + case !util.IsPermittedInHashtag(r) && !util.IsHashtagBoundary(r): + // Fake hashtag, don't trust it + return nil + case util.IsHashtagBoundary(r): + // End of hashtag + block.Advance(i) + return newHashtag(segment.WithStop(segment.Start + i)) + } + } + // If we don't find invalid characters before the end of the line then it's good + block.Advance(len(s)) + return newHashtag(segment) +} + +// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces. +// It is created in FromMarkdown to be used a goldmark extension, and the fields are used +// when rendering mentions and tags. +type customRenderer struct { + f *formatter + ctx context.Context + mentions []*gtsmodel.Mention + tags []*gtsmodel.Tag +} + +func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(kindMention, r.renderMention) + reg.Register(kindHashtag, r.renderHashtag) +} + +func (r *customRenderer) Extend(m goldmark.Markdown) { + m.Parser().AddOptions(parser.WithInlineParsers( + // 500 is pretty arbitrary here, it was copied from example goldmark extension code. + // https://github.com/yuin/goldmark/blob/75d8cce5b78c7e1d5d9c4ca32c1164f0a1e57b53/extension/strikethrough.go#L111 + mdutil.Prioritized(&mentionParser{}, 500), + mdutil.Prioritized(&hashtagParser{}, 500), + )) + m.Renderer().AddOptions(renderer.WithNodeRenderers( + mdutil.Prioritized(r, 500), + )) +} + +// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML. +func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + n, ok := node.(*mention) // this function is only registered for kindMention + if !ok { + log.Errorf("type assertion failed") + } + text := string(n.Segment.Value(source)) + + html := r.f.ReplaceMentions(r.ctx, text, r.mentions) + + // we don't have much recourse if this fails + if _, err := w.WriteString(html); err != nil { + log.Errorf("error outputting markdown text: %s", err) + } + return ast.WalkContinue, nil +} + +func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + n, ok := node.(*hashtag) // this function is only registered for kindHashtag + if !ok { + log.Errorf("type assertion failed") + } + text := string(n.Segment.Value(source)) + + html := r.f.ReplaceTags(r.ctx, text, r.tags) + + // we don't have much recourse if this fails + if _, err := w.WriteString(html); err != nil { + log.Errorf("error outputting markdown text: %s", err) + } + return ast.WalkContinue, nil +} |