diff options
Diffstat (limited to 'internal/text/goldmark_parsers.go')
-rw-r--r-- | internal/text/goldmark_parsers.go | 281 |
1 files changed, 281 insertions, 0 deletions
diff --git a/internal/text/goldmark_parsers.go b/internal/text/goldmark_parsers.go new file mode 100644 index 000000000..b7cf4f9e9 --- /dev/null +++ b/internal/text/goldmark_parsers.go @@ -0,0 +1,281 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package text + +import ( + "fmt" + "strings" + + "github.com/superseriousbusiness/gotosocial/internal/regexes" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" +) + +/* + MENTION PARSER STUFF +*/ + +// mention fulfils the goldmark +// ast.Node interface. +type mention struct { + ast.BaseInline + Segment text.Segment +} + +var kindMention = ast.NewNodeKind("Mention") + +func (n *mention) Kind() ast.NodeKind { + return kindMention +} + +func (n *mention) Dump(source []byte, level int) { + fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source))) +} + +// newMention creates a goldmark ast.Node +// from a text.Segment. The contained segment +// is used in rendering. +func newMention(s text.Segment) *mention { + return &mention{ + BaseInline: ast.BaseInline{}, + Segment: s, + } +} + +// mentionParser fulfils the goldmark +// parser.InlineParser interface. +type mentionParser struct{} + +// Mention parsing is triggered by the `@` symbol +// which appears at the beginning of a mention. +func (p *mentionParser) Trigger() []byte { + return []byte{'@'} +} + +func (p *mentionParser) Parse( + _ ast.Node, + block text.Reader, + _ parser.Context, +) ast.Node { + // If preceding character is not a valid boundary + // character, then this cannot be a valid mention. + if !isMentionBoundary(block.PrecendingCharacter()) { + return nil + } + + line, segment := block.PeekLine() + + // Ascertain location of mention in the line + // that starts with the trigger character. + loc := regexes.MentionFinder.FindIndex(line) + if loc == nil || loc[0] != 0 { + // Noop if not found or + // not found at start. + return nil + } + + // Advance the block to + // the end of the mention. + block.Advance(loc[1]) + + // mention ast.Node spans from the + // beginning of this segment up to + // the last character of the mention. + return newMention( + segment.WithStop( + segment.Start + loc[1], + ), + ) +} + +/* + HASHTAG PARSER STUFF +*/ + +// hashtag fulfils the goldmark +// ast.Node interface. +type hashtag struct { + ast.BaseInline + Segment text.Segment +} + +var kindHashtag = ast.NewNodeKind("Hashtag") + +func (n *hashtag) Kind() ast.NodeKind { + return kindHashtag +} + +func (n *hashtag) Dump(source []byte, level int) { + fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source))) +} + +// newHashtag creates a goldmark ast.Node +// from a text.Segment. The contained segment +// is used in rendering. +func newHashtag(s text.Segment) *hashtag { + return &hashtag{ + BaseInline: ast.BaseInline{}, + Segment: s, + } +} + +type hashtagParser struct{} + +// Hashtag parsing is triggered by a '#' symbol +// which appears at the beginning of a hashtag. +func (p *hashtagParser) Trigger() []byte { + return []byte{'#'} +} + +func (p *hashtagParser) Parse( + _ ast.Node, + block text.Reader, + _ parser.Context, +) ast.Node { + // If preceding character is not a valid boundary + // character, then this cannot be a valid hashtag. + if !isHashtagBoundary(block.PrecendingCharacter()) { + return nil + } + + var ( + line, segment = block.PeekLine() + lineStr = string(line) + lineStrLen = len(lineStr) + ) + + if lineStrLen <= 1 { + // This is probably just + // a lonely '#' char. + return nil + } + + // Iterate through the runes in the detected + // hashtag string until we reach either: + // - A weird character (bad). + // - The end of the hashtag (ok). + // - The end of the string (also ok). + for i, r := range lineStr { + switch { + case r == '#' && i == 0: + // Ignore initial '#'. + continue + + case !isPlausiblyInHashtag(r) && + !isHashtagBoundary(r): + // Weird non-boundary character + // in the hashtag. Don't trust it. + return nil + + case isHashtagBoundary(r): + // Reached closing hashtag + // boundary. Advance block + // to the end of the hashtag. + block.Advance(i) + + // hashtag ast.Node spans from + // the beginning of this segment + // up to the boundary character. + return newHashtag( + segment.WithStop( + segment.Start + i, + ), + ) + } + } + + // No invalid or boundary characters before the + // end of the line: it's all hashtag, baby 😎 + // + // Advance block to the end of the segment. + block.Advance(segment.Len()) + + // hashtag ast.Node spans + // the entire segment. + return newHashtag(segment) +} + +/* + EMOJI PARSER STUFF +*/ + +// emoji fulfils the goldmark +// ast.Node interface. +type emoji struct { + ast.BaseInline + Segment text.Segment +} + +var kindEmoji = ast.NewNodeKind("Emoji") + +func (n *emoji) Kind() ast.NodeKind { + return kindEmoji +} + +func (n *emoji) Dump(source []byte, level int) { + fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source))) +} + +// newEmoji creates a goldmark ast.Node +// from a text.Segment. The contained +// segment is used in rendering. +func newEmoji(s text.Segment) *emoji { + return &emoji{ + BaseInline: ast.BaseInline{}, + Segment: s, + } +} + +type emojiParser struct{} + +// Emoji parsing is triggered by a ':' char +// which appears at the start of the emoji. +func (p *emojiParser) Trigger() []byte { + return []byte{':'} +} + +func (p *emojiParser) Parse( + _ ast.Node, + block text.Reader, + _ parser.Context, +) ast.Node { + line, segment := block.PeekLine() + + // Ascertain location of emoji in the line + // that starts with the trigger character. + loc := regexes.EmojiFinder.FindIndex(line) + if loc == nil || loc[0] != 0 { + // Noop if not found or + // not found at start. + return nil + } + + // Advance the block to + // the end of the emoji. + block.Advance(loc[1]) + + // emoji ast.Node spans from the + // beginning of this segment up to + // the last character of the emoji. + return newEmoji( + segment.WithStop( + segment.Start + loc[1], + ), + ) +} |