summaryrefslogtreecommitdiff
path: root/internal/text
diff options
context:
space:
mode:
Diffstat (limited to 'internal/text')
-rw-r--r--internal/text/markdown.go72
-rw-r--r--internal/text/markdown_test.go21
-rw-r--r--internal/text/markdownextension.go215
3 files changed, 261 insertions, 47 deletions
diff --git a/internal/text/markdown.go b/internal/text/markdown.go
index 837f213e1..56edf2d01 100644
--- a/internal/text/markdown.go
+++ b/internal/text/markdown.go
@@ -21,62 +21,23 @@ package text
import (
"bytes"
"context"
- "io"
"strings"
- "github.com/russross/blackfriday/v2"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/tdewolff/minify/v2"
- "github.com/tdewolff/minify/v2/html"
+ minifyHtml "github.com/tdewolff/minify/v2/html"
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/extension"
+ "github.com/yuin/goldmark/renderer/html"
)
var (
- bfExtensions = blackfriday.NoIntraEmphasis | blackfriday.FencedCode | blackfriday.Autolink | blackfriday.Strikethrough | blackfriday.SpaceHeadings | blackfriday.HardLineBreak
- m *minify.M
+ m *minify.M
)
-type renderer struct {
- f *formatter
- ctx context.Context
- mentions []*gtsmodel.Mention
- tags []*gtsmodel.Tag
- blackfriday.HTMLRenderer
-}
-
-func (r *renderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
- if node.Type == blackfriday.Text {
- // call RenderNode to do the html escaping
- var buff bytes.Buffer
- status := r.HTMLRenderer.RenderNode(&buff, node, entering)
-
- html := buff.String()
- html = r.f.ReplaceTags(r.ctx, html, r.tags)
- html = r.f.ReplaceMentions(r.ctx, html, r.mentions)
-
- // we don't have much recourse if this fails
- if _, err := io.WriteString(w, html); err != nil {
- log.Errorf("error outputting markdown text: %s", err)
- }
- return status
- }
- return r.HTMLRenderer.RenderNode(w, node, entering)
-}
-
func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string {
- renderer := &renderer{
- f: f,
- ctx: ctx,
- mentions: mentions,
- tags: tags,
- HTMLRenderer: *blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{
- // same as blackfriday.CommonHTMLFlags, but with Smartypants disabled
- // ref: https://github.com/superseriousbusiness/gotosocial/issues/1028
- Flags: blackfriday.UseXHTML,
- }),
- }
-
// Temporarily replace all found emoji shortcodes in the markdown text with
// their ID so that they're not parsed as anything by the markdown parser -
// this fixes cases where emojis with some underscores in them are parsed as
@@ -89,8 +50,25 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti
}
// parse markdown text into html, using custom renderer to add hashtag/mention links
- htmlContentBytes := blackfriday.Run([]byte(markdownText), blackfriday.WithExtensions(bfExtensions), blackfriday.WithRenderer(renderer))
- htmlContent := string(htmlContentBytes)
+ md := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithXHTML(),
+ html.WithHardWraps(),
+ html.WithUnsafe(), // allows raw HTML
+ ),
+ goldmark.WithExtensions(
+ &customRenderer{f, ctx, mentions, tags},
+ extension.Linkify, // turns URLs into links
+ extension.Strikethrough,
+ ),
+ )
+
+ var htmlContentBytes bytes.Buffer
+ err := md.Convert([]byte(markdownText), &htmlContentBytes)
+ if err != nil {
+ log.Errorf("error rendering markdown to HTML: %s", err)
+ }
+ htmlContent := htmlContentBytes.String()
// Replace emoji IDs in the parsed html content with their shortcodes again
for _, e := range emojis {
@@ -102,7 +80,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti
if m == nil {
m = minify.New()
- m.Add("text/html", &html.Minifier{
+ m.Add("text/html", &minifyHtml.Minifier{
KeepEndTags: true,
KeepQuotes: true,
})
diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go
index 6b7c1032f..3a67218d4 100644
--- a/internal/text/markdown_test.go
+++ b/internal/text/markdown_test.go
@@ -71,6 +71,12 @@ const (
mdMentionAndCodeBlockExpected = "<p><span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>the_mighty_zork</span></a></span></p><pre><code>@the_mighty_zork\n</code></pre>"
mdWithSmartypants = "\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping"
mdWithSmartypantsExpected = "<p>\"you have to quargle the bleepflorp\" they said with 1/2 of nominal speed and 1/3 of the usual glumping</p>"
+ mdWithAsciiHeart = "hello <3 old friend <3 i loved u </3 :(( you stole my heart"
+ mdWithAsciiHeartExpected = "<p>hello &lt;3 old friend &lt;3 i loved u &lt;/3 :(( you stole my heart</p>"
+ mdWithStrikethrough = "I have ~~mdae~~ made an error"
+ mdWithStrikethroughExpected = "<p>I have <del>mdae</del> made an error</p>"
+ mdWithLink = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial"
+ mdWithLinkExpected = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>"
)
type MarkdownTestSuite struct {
@@ -160,6 +166,21 @@ func (suite *MarkdownTestSuite) TestParseSmartypants() {
suite.Equal(mdWithSmartypantsExpected, s)
}
+func (suite *MarkdownTestSuite) TestParseAsciiHeart() {
+ s := suite.formatter.FromMarkdown(context.Background(), mdWithAsciiHeart, nil, nil, nil)
+ suite.Equal(mdWithAsciiHeartExpected, s)
+}
+
+func (suite *MarkdownTestSuite) TestParseStrikethrough() {
+ s := suite.formatter.FromMarkdown(context.Background(), mdWithStrikethrough, nil, nil, nil)
+ suite.Equal(mdWithStrikethroughExpected, s)
+}
+
+func (suite *MarkdownTestSuite) TestParseLink() {
+ s := suite.formatter.FromMarkdown(context.Background(), mdWithLink, nil, nil, nil)
+ suite.Equal(mdWithLinkExpected, s)
+}
+
func TestMarkdownTestSuite(t *testing.T) {
suite.Run(t, new(MarkdownTestSuite))
}
diff --git a/internal/text/markdownextension.go b/internal/text/markdownextension.go
new file mode 100644
index 000000000..1be1e99d6
--- /dev/null
+++ b/internal/text/markdownextension.go
@@ -0,0 +1,215 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package text
+
+import (
+ "context"
+ "unicode"
+
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
+ "github.com/superseriousbusiness/gotosocial/internal/regexes"
+ "github.com/superseriousbusiness/gotosocial/internal/util"
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/renderer"
+ "github.com/yuin/goldmark/text"
+ mdutil "github.com/yuin/goldmark/util"
+)
+
+// A goldmark extension that parses potential mentions and hashtags separately from regular
+// text, so that they stay as one contiguous text fragment in the AST, and then renders
+// them separately too, to avoid scanning normal text for mentions and tags.
+
+// mention and hashtag fulfil the goldmark ast.Node interface.
+type mention struct {
+ ast.BaseInline
+ Segment text.Segment
+}
+
+type hashtag struct {
+ ast.BaseInline
+ Segment text.Segment
+}
+
+var kindMention = ast.NewNodeKind("Mention")
+var kindHashtag = ast.NewNodeKind("Hashtag")
+
+func (n *mention) Kind() ast.NodeKind {
+ return kindMention
+}
+
+func (n *hashtag) Kind() ast.NodeKind {
+ return kindHashtag
+}
+
+// Dump is used by goldmark for debugging. It is implemented only minimally because
+// it is not used in our code.
+func (n *mention) Dump(source []byte, level int) {
+ ast.DumpHelper(n, source, level, nil, nil)
+}
+
+func (n *hashtag) Dump(source []byte, level int) {
+ ast.DumpHelper(n, source, level, nil, nil)
+}
+
+// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
+// The contained segment is used in rendering.
+func newMention(s text.Segment) *mention {
+ return &mention{
+ BaseInline: ast.BaseInline{},
+ Segment: s,
+ }
+}
+
+func newHashtag(s text.Segment) *hashtag {
+ return &hashtag{
+ BaseInline: ast.BaseInline{},
+ Segment: s,
+ }
+}
+
+// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
+type mentionParser struct {
+}
+
+type hashtagParser struct {
+}
+
+func (p *mentionParser) Trigger() []byte {
+ return []byte{'@'}
+}
+
+func (p *hashtagParser) Trigger() []byte {
+ return []byte{'#'}
+}
+
+func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
+ before := block.PrecendingCharacter()
+ line, segment := block.PeekLine()
+
+ if !unicode.IsSpace(before) {
+ return nil
+ }
+
+ // unideal for performance but makes use of existing regex
+ loc := regexes.MentionFinder.FindIndex(line)
+ switch {
+ case loc == nil:
+ fallthrough
+ case loc[0] != 0: // fail if not found at start
+ return nil
+ default:
+ block.Advance(loc[1])
+ return newMention(segment.WithStop(segment.Start + loc[1]))
+ }
+}
+
+func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
+ before := block.PrecendingCharacter()
+ line, segment := block.PeekLine()
+ s := string(line)
+
+ if !util.IsHashtagBoundary(before) {
+ return nil
+ }
+
+ for i, r := range s {
+ switch {
+ case r == '#' && i == 0:
+ continue
+ case !util.IsPermittedInHashtag(r) && !util.IsHashtagBoundary(r):
+ // Fake hashtag, don't trust it
+ return nil
+ case util.IsHashtagBoundary(r):
+ // End of hashtag
+ block.Advance(i)
+ return newHashtag(segment.WithStop(segment.Start + i))
+ }
+ }
+ // If we don't find invalid characters before the end of the line then it's good
+ block.Advance(len(s))
+ return newHashtag(segment)
+}
+
+// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
+// It is created in FromMarkdown to be used a goldmark extension, and the fields are used
+// when rendering mentions and tags.
+type customRenderer struct {
+ f *formatter
+ ctx context.Context
+ mentions []*gtsmodel.Mention
+ tags []*gtsmodel.Tag
+}
+
+func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
+ reg.Register(kindMention, r.renderMention)
+ reg.Register(kindHashtag, r.renderHashtag)
+}
+
+func (r *customRenderer) Extend(m goldmark.Markdown) {
+ m.Parser().AddOptions(parser.WithInlineParsers(
+ // 500 is pretty arbitrary here, it was copied from example goldmark extension code.
+ // https://github.com/yuin/goldmark/blob/75d8cce5b78c7e1d5d9c4ca32c1164f0a1e57b53/extension/strikethrough.go#L111
+ mdutil.Prioritized(&mentionParser{}, 500),
+ mdutil.Prioritized(&hashtagParser{}, 500),
+ ))
+ m.Renderer().AddOptions(renderer.WithNodeRenderers(
+ mdutil.Prioritized(r, 500),
+ ))
+}
+
+// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
+func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+
+ n, ok := node.(*mention) // this function is only registered for kindMention
+ if !ok {
+ log.Errorf("type assertion failed")
+ }
+ text := string(n.Segment.Value(source))
+
+ html := r.f.ReplaceMentions(r.ctx, text, r.mentions)
+
+ // we don't have much recourse if this fails
+ if _, err := w.WriteString(html); err != nil {
+ log.Errorf("error outputting markdown text: %s", err)
+ }
+ return ast.WalkContinue, nil
+}
+
+func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+
+ n, ok := node.(*hashtag) // this function is only registered for kindHashtag
+ if !ok {
+ log.Errorf("type assertion failed")
+ }
+ text := string(n.Segment.Value(source))
+
+ html := r.f.ReplaceTags(r.ctx, text, r.tags)
+
+ // we don't have much recourse if this fails
+ if _, err := w.WriteString(html); err != nil {
+ log.Errorf("error outputting markdown text: %s", err)
+ }
+ return ast.WalkContinue, nil
+}