summaryrefslogtreecommitdiff
path: root/internal/text
diff options
context:
space:
mode:
authorLibravatar Tobi Smethurst <31960611+tsmethurst@users.noreply.github.com>2021-07-26 20:25:54 +0200
committerLibravatar GitHub <noreply@github.com>2021-07-26 20:25:54 +0200
commitad0e26dc04008feec8de0603c88fbd63f87c18ec (patch)
treefb8402a9d881b6480eba0a2402f05f7b39f7435c /internal/text
parentadd trusted proxy for parsing client IPs (#115) (diff)
downloadgotosocial-ad0e26dc04008feec8de0603c88fbd63f87c18ec.tar.xz
Markdown Statuses (#116)
* parse markdown statuses if desired * add some preliminary docs for writing posts
Diffstat (limited to 'internal/text')
-rw-r--r--internal/text/common.go46
-rw-r--r--internal/text/formatter.go49
-rw-r--r--internal/text/markdown.go58
-rw-r--r--internal/text/plain.go50
-rw-r--r--internal/text/sanitize.go50
5 files changed, 253 insertions, 0 deletions
diff --git a/internal/text/common.go b/internal/text/common.go
new file mode 100644
index 000000000..0165af630
--- /dev/null
+++ b/internal/text/common.go
@@ -0,0 +1,46 @@
+/*
+ GoToSocial
+ Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package text
+
+import (
+ "fmt"
+ "strings"
+)
+
+// preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text.
+func preformat(in string) string {
+ // do some preformatting of the text
+ // 1. Trim all the whitespace
+ s := strings.TrimSpace(in)
+ return s
+}
+
+// postformat contains some common logic for html sanitization of text, wrapping elements, and trimming newlines and whitespace
+func postformat(in string) string {
+ // do some postformatting of the text
+ // 1. sanitize html to remove any dodgy scripts or other disallowed elements
+ s := SanitizeHTML(in)
+ // 2. wrap the whole thing in a paragraph
+ s = fmt.Sprintf(`<p>%s</p>`, s)
+ // 3. remove any cheeky newlines
+ s = strings.ReplaceAll(s, "\n", "")
+ // 4. remove any whitespace added as a result of the formatting
+ s = strings.TrimSpace(s)
+ return s
+}
diff --git a/internal/text/formatter.go b/internal/text/formatter.go
new file mode 100644
index 000000000..f8cca6675
--- /dev/null
+++ b/internal/text/formatter.go
@@ -0,0 +1,49 @@
+/*
+ GoToSocial
+ Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package text
+
+import (
+ "github.com/sirupsen/logrus"
+ "github.com/superseriousbusiness/gotosocial/internal/config"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+)
+
+// Formatter wraps some logic and functions for parsing statuses and other text input into nice html.
+type Formatter interface {
+ // FromMarkdown parses an HTML text from a markdown-formatted text.
+ FromMarkdown(md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string
+ // FromPlain parses an HTML text from a plaintext.
+ FromPlain(plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string
+}
+
+type formatter struct {
+ cfg *config.Config
+ db db.DB
+ log *logrus.Logger
+}
+
+// NewFormatter returns a new Formatter interface for parsing statuses and other text input into nice html.
+func NewFormatter(cfg *config.Config, db db.DB, log *logrus.Logger) Formatter {
+ return &formatter{
+ cfg: cfg,
+ db: db,
+ log: log,
+ }
+}
diff --git a/internal/text/markdown.go b/internal/text/markdown.go
new file mode 100644
index 000000000..d1309f389
--- /dev/null
+++ b/internal/text/markdown.go
@@ -0,0 +1,58 @@
+/*
+ GoToSocial
+ Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package text
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/russross/blackfriday/v2"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+)
+
+var bfExtensions = blackfriday.NoIntraEmphasis |
+ blackfriday.FencedCode |
+ blackfriday.Autolink |
+ blackfriday.Strikethrough |
+ blackfriday.SpaceHeadings |
+ blackfriday.BackslashLineBreak
+
+func (f *formatter) FromMarkdown(md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string {
+ content := preformat(md)
+
+ // do the markdown parsing *first*
+ content = string(blackfriday.Run([]byte(content), blackfriday.WithExtensions(bfExtensions)))
+
+ // format mentions nicely
+ for _, menchie := range mentions {
+ targetAccount := &gtsmodel.Account{}
+ if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil {
+ mentionContent := fmt.Sprintf(`<span class="h-card"><a href="%s" class="u-url mention">@<span>%s</span></a></span>`, targetAccount.URL, targetAccount.Username)
+ content = strings.ReplaceAll(content, menchie.NameString, mentionContent)
+ }
+ }
+
+ // format tags nicely
+ for _, tag := range tags {
+ tagContent := fmt.Sprintf(`<a href="%s" class="mention hashtag" rel="tag">#<span>%s</span></a>`, tag.URL, tag.Name)
+ content = strings.ReplaceAll(content, fmt.Sprintf("#%s", tag.Name), tagContent)
+ }
+
+ return postformat(content)
+}
diff --git a/internal/text/plain.go b/internal/text/plain.go
new file mode 100644
index 000000000..24ef16f8e
--- /dev/null
+++ b/internal/text/plain.go
@@ -0,0 +1,50 @@
+/*
+ GoToSocial
+ Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package text
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+)
+
+func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string {
+ content := preformat(plain)
+
+ // format mentions nicely
+ for _, menchie := range mentions {
+ targetAccount := &gtsmodel.Account{}
+ if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil {
+ mentionContent := fmt.Sprintf(`<span class="h-card"><a href="%s" class="u-url mention">@<span>%s</span></a></span>`, targetAccount.URL, targetAccount.Username)
+ content = strings.ReplaceAll(content, menchie.NameString, mentionContent)
+ }
+ }
+
+ // format tags nicely
+ for _, tag := range tags {
+ tagContent := fmt.Sprintf(`<a href="%s" class="mention hashtag" rel="tag">#<span>%s</span></a>`, tag.URL, tag.Name)
+ content = strings.ReplaceAll(content, fmt.Sprintf("#%s", tag.Name), tagContent)
+ }
+
+ // replace newlines with breaks
+ content = strings.ReplaceAll(content, "\n", "<br />")
+
+ return postformat(content)
+}
diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go
new file mode 100644
index 000000000..aac9d8aab
--- /dev/null
+++ b/internal/text/sanitize.go
@@ -0,0 +1,50 @@
+/*
+ GoToSocial
+ Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package text
+
+import (
+ "github.com/microcosm-cc/bluemonday"
+)
+
+// '[A]llows a broad selection of HTML elements and attributes that are safe for user generated content.
+// Note that this policy does not allow iframes, object, embed, styles, script, etc.
+// An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.'
+//
+// Source: https://github.com/microcosm-cc/bluemonday#usage
+var regular *bluemonday.Policy = bluemonday.UGCPolicy().
+ RequireNoReferrerOnLinks(true).
+ RequireNoFollowOnLinks(true).
+ RequireCrossOriginAnonymous(true)
+
+// '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist.
+// An example usage scenario would be blog post titles where HTML tags are not expected at all
+// and if they are then the elements and the content of the elements should be stripped. This is a very strict policy.'
+//
+// Source: https://github.com/microcosm-cc/bluemonday#usage
+var strict *bluemonday.Policy = bluemonday.StrictPolicy()
+
+// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements.
+func SanitizeHTML(in string) string {
+ return regular.Sanitize(in)
+}
+
+// RemoveHTML removes all HTML from the given string.
+func RemoveHTML(in string) string {
+ return strict.Sanitize(in)
+}