1 files changed, 281 insertions, 0 deletions
diff --git a/internal/text/goldmark_parsers.go b/internal/text/goldmark_parsers.go
new file mode 100644
index 000000000..b7cf4f9e9
--- /dev/null
+++ b/internal/text/goldmark_parsers.go
@@ -0,0 +1,281 @@
+// GoToSocial
+// Copyright (C) GoToSocial Authors admin@gotosocial.org
+// SPDX-License-Identifier: AGPL-3.0-or-later
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+package text
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/superseriousbusiness/gotosocial/internal/regexes"
+	"github.com/yuin/goldmark/ast"
+	"github.com/yuin/goldmark/parser"
+	"github.com/yuin/goldmark/text"
+)
+
+/*
+	MENTION PARSER STUFF
+*/
+
+// mention fulfils the goldmark
+// ast.Node interface.
+type mention struct {
+	ast.BaseInline
+	Segment text.Segment
+}
+
+var kindMention = ast.NewNodeKind("Mention")
+
+func (n *mention) Kind() ast.NodeKind {
+	return kindMention
+}
+
+func (n *mention) Dump(source []byte, level int) {
+	fmt.Printf("%sMention: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
+}
+
+// newMention creates a goldmark ast.Node
+// from a text.Segment. The contained segment
+// is used in rendering.
+func newMention(s text.Segment) *mention {
+	return &mention{
+		BaseInline: ast.BaseInline{},
+		Segment:    s,
+	}
+}
+
+// mentionParser fulfils the goldmark
+// parser.InlineParser interface.
+type mentionParser struct{}
+
+// Mention parsing is triggered by the `@` symbol
+// which appears at the beginning of a mention.
+func (p *mentionParser) Trigger() []byte {
+	return []byte{'@'}
+}
+
+func (p *mentionParser) Parse(
+	_ ast.Node,
+	block text.Reader,
+	_ parser.Context,
+) ast.Node {
+	// If preceding character is not a valid boundary
+	// character, then this cannot be a valid mention.
+	if !isMentionBoundary(block.PrecendingCharacter()) {
+		return nil
+	}
+
+	line, segment := block.PeekLine()
+
+	// Ascertain location of mention in the line
+	// that starts with the trigger character.
+	loc := regexes.MentionFinder.FindIndex(line)
+	if loc == nil || loc[0] != 0 {
+		// Noop if not found or
+		// not found at start.
+		return nil
+	}
+
+	// Advance the block to
+	// the end of the mention.
+	block.Advance(loc[1])
+
+	// mention ast.Node spans from the
+	// beginning of this segment up to
+	// the last character of the mention.
+	return newMention(
+		segment.WithStop(
+			segment.Start + loc[1],
+		),
+	)
+}
+
+/*
+	HASHTAG PARSER STUFF
+*/
+
+// hashtag fulfils the goldmark
+// ast.Node interface.
+type hashtag struct {
+	ast.BaseInline
+	Segment text.Segment
+}
+
+var kindHashtag = ast.NewNodeKind("Hashtag")
+
+func (n *hashtag) Kind() ast.NodeKind {
+	return kindHashtag
+}
+
+func (n *hashtag) Dump(source []byte, level int) {
+	fmt.Printf("%sHashtag: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
+}
+
+// newHashtag creates a goldmark ast.Node
+// from a text.Segment. The contained segment
+// is used in rendering.
+func newHashtag(s text.Segment) *hashtag {
+	return &hashtag{
+		BaseInline: ast.BaseInline{},
+		Segment:    s,
+	}
+}
+
+type hashtagParser struct{}
+
+// Hashtag parsing is triggered by a '#' symbol
+// which appears at the beginning of a hashtag.
+func (p *hashtagParser) Trigger() []byte {
+	return []byte{'#'}
+}
+
+func (p *hashtagParser) Parse(
+	_ ast.Node,
+	block text.Reader,
+	_ parser.Context,
+) ast.Node {
+	// If preceding character is not a valid boundary
+	// character, then this cannot be a valid hashtag.
+	if !isHashtagBoundary(block.PrecendingCharacter()) {
+		return nil
+	}
+
+	var (
+		line, segment = block.PeekLine()
+		lineStr       = string(line)
+		lineStrLen    = len(lineStr)
+	)
+
+	if lineStrLen <= 1 {
+		// This is probably just
+		// a lonely '#' char.
+		return nil
+	}
+
+	// Iterate through the runes in the detected
+	// hashtag string until we reach either:
+	//   - A weird character (bad).
+	//   - The end of the hashtag (ok).
+	//   - The end of the string (also ok).
+	for i, r := range lineStr {
+		switch {
+		case r == '#' && i == 0:
+			// Ignore initial '#'.
+			continue
+
+		case !isPlausiblyInHashtag(r) &&
+			!isHashtagBoundary(r):
+			// Weird non-boundary character
+			// in the hashtag. Don't trust it.
+			return nil
+
+		case isHashtagBoundary(r):
+			// Reached closing hashtag
+			// boundary. Advance block
+			// to the end of the hashtag.
+			block.Advance(i)
+
+			// hashtag ast.Node spans from
+			// the beginning of this segment
+			// up to the boundary character.
+			return newHashtag(
+				segment.WithStop(
+					segment.Start + i,
+				),
+			)
+		}
+	}
+
+	// No invalid or boundary characters before the
+	// end of the line: it's all hashtag, baby 😎
+	//
+	// Advance block to the end of the segment.
+	block.Advance(segment.Len())
+
+	// hashtag ast.Node spans
+	// the entire segment.
+	return newHashtag(segment)
+}
+
+/*
+	EMOJI PARSER STUFF
+*/
+
+// emoji fulfils the goldmark
+// ast.Node interface.
+type emoji struct {
+	ast.BaseInline
+	Segment text.Segment
+}
+
+var kindEmoji = ast.NewNodeKind("Emoji")
+
+func (n *emoji) Kind() ast.NodeKind {
+	return kindEmoji
+}
+
+func (n *emoji) Dump(source []byte, level int) {
+	fmt.Printf("%sEmoji: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))
+}
+
+// newEmoji creates a goldmark ast.Node
+// from a text.Segment. The contained
+// segment is used in rendering.
+func newEmoji(s text.Segment) *emoji {
+	return &emoji{
+		BaseInline: ast.BaseInline{},
+		Segment:    s,
+	}
+}
+
+type emojiParser struct{}
+
+// Emoji parsing is triggered by a ':' char
+// which appears at the start of the emoji.
+func (p *emojiParser) Trigger() []byte {
+	return []byte{':'}
+}
+
+func (p *emojiParser) Parse(
+	_ ast.Node,
+	block text.Reader,
+	_ parser.Context,
+) ast.Node {
+	line, segment := block.PeekLine()
+
+	// Ascertain location of emoji in the line
+	// that starts with the trigger character.
+	loc := regexes.EmojiFinder.FindIndex(line)
+	if loc == nil || loc[0] != 0 {
+		// Noop if not found or
+		// not found at start.
+		return nil
+	}
+
+	// Advance the block to
+	// the end of the emoji.
+	block.Advance(loc[1])
+
+	// emoji ast.Node spans from the
+	// beginning of this segment up to
+	// the last character of the emoji.
+	return newEmoji(
+		segment.WithStop(
+			segment.Start + loc[1],
+		),
+	)
+}