diff options
Diffstat (limited to 'internal/text')
-rw-r--r-- | internal/text/common.go | 46 | ||||
-rw-r--r-- | internal/text/formatter.go | 49 | ||||
-rw-r--r-- | internal/text/markdown.go | 58 | ||||
-rw-r--r-- | internal/text/plain.go | 50 | ||||
-rw-r--r-- | internal/text/sanitize.go | 50 |
5 files changed, 253 insertions, 0 deletions
diff --git a/internal/text/common.go b/internal/text/common.go new file mode 100644 index 000000000..0165af630 --- /dev/null +++ b/internal/text/common.go @@ -0,0 +1,46 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "fmt" + "strings" +) + +// preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text. +func preformat(in string) string { + // do some preformatting of the text + // 1. Trim all the whitespace + s := strings.TrimSpace(in) + return s +} + +// postformat contains some common logic for html sanitization of text, wrapping elements, and trimming newlines and whitespace +func postformat(in string) string { + // do some postformatting of the text + // 1. sanitize html to remove any dodgy scripts or other disallowed elements + s := SanitizeHTML(in) + // 2. wrap the whole thing in a paragraph + s = fmt.Sprintf(`<p>%s</p>`, s) + // 3. remove any cheeky newlines + s = strings.ReplaceAll(s, "\n", "") + // 4. remove any whitespace added as a result of the formatting + s = strings.TrimSpace(s) + return s +} diff --git a/internal/text/formatter.go b/internal/text/formatter.go new file mode 100644 index 000000000..f8cca6675 --- /dev/null +++ b/internal/text/formatter.go @@ -0,0 +1,49 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "github.com/sirupsen/logrus" + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +) + +// Formatter wraps some logic and functions for parsing statuses and other text input into nice html. +type Formatter interface { + // FromMarkdown parses an HTML text from a markdown-formatted text. + FromMarkdown(md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string + // FromPlain parses an HTML text from a plaintext. + FromPlain(plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string +} + +type formatter struct { + cfg *config.Config + db db.DB + log *logrus.Logger +} + +// NewFormatter returns a new Formatter interface for parsing statuses and other text input into nice html. +func NewFormatter(cfg *config.Config, db db.DB, log *logrus.Logger) Formatter { + return &formatter{ + cfg: cfg, + db: db, + log: log, + } +} diff --git a/internal/text/markdown.go b/internal/text/markdown.go new file mode 100644 index 000000000..d1309f389 --- /dev/null +++ b/internal/text/markdown.go @@ -0,0 +1,58 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "fmt" + "strings" + + "github.com/russross/blackfriday/v2" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +) + +var bfExtensions = blackfriday.NoIntraEmphasis | + blackfriday.FencedCode | + blackfriday.Autolink | + blackfriday.Strikethrough | + blackfriday.SpaceHeadings | + blackfriday.BackslashLineBreak + +func (f *formatter) FromMarkdown(md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { + content := preformat(md) + + // do the markdown parsing *first* + content = string(blackfriday.Run([]byte(content), blackfriday.WithExtensions(bfExtensions))) + + // format mentions nicely + for _, menchie := range mentions { + targetAccount := >smodel.Account{} + if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil { + mentionContent := fmt.Sprintf(`<span class="h-card"><a href="%s" class="u-url mention">@<span>%s</span></a></span>`, targetAccount.URL, targetAccount.Username) + content = strings.ReplaceAll(content, menchie.NameString, mentionContent) + } + } + + // format tags nicely + for _, tag := range tags { + tagContent := fmt.Sprintf(`<a href="%s" class="mention hashtag" rel="tag">#<span>%s</span></a>`, tag.URL, tag.Name) + content = strings.ReplaceAll(content, fmt.Sprintf("#%s", tag.Name), tagContent) + } + + return postformat(content) +} diff --git a/internal/text/plain.go b/internal/text/plain.go new file mode 100644 index 000000000..24ef16f8e --- /dev/null +++ b/internal/text/plain.go @@ -0,0 +1,50 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "fmt" + "strings" + + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +) + +func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { + content := preformat(plain) + + // format mentions nicely + for _, menchie := range mentions { + targetAccount := >smodel.Account{} + if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil { + mentionContent := fmt.Sprintf(`<span class="h-card"><a href="%s" class="u-url mention">@<span>%s</span></a></span>`, targetAccount.URL, targetAccount.Username) + content = strings.ReplaceAll(content, menchie.NameString, mentionContent) + } + } + + // format tags nicely + for _, tag := range tags { + tagContent := fmt.Sprintf(`<a href="%s" class="mention hashtag" rel="tag">#<span>%s</span></a>`, tag.URL, tag.Name) + content = strings.ReplaceAll(content, fmt.Sprintf("#%s", tag.Name), tagContent) + } + + // replace newlines with breaks + content = strings.ReplaceAll(content, "\n", "<br />") + + return postformat(content) +} diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go new file mode 100644 index 000000000..aac9d8aab --- /dev/null +++ b/internal/text/sanitize.go @@ -0,0 +1,50 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( + "github.com/microcosm-cc/bluemonday" +) + +// '[A]llows a broad selection of HTML elements and attributes that are safe for user generated content. +// Note that this policy does not allow iframes, object, embed, styles, script, etc. +// An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.' +// +// Source: https://github.com/microcosm-cc/bluemonday#usage +var regular *bluemonday.Policy = bluemonday.UGCPolicy(). + RequireNoReferrerOnLinks(true). + RequireNoFollowOnLinks(true). + RequireCrossOriginAnonymous(true) + +// '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist. +// An example usage scenario would be blog post titles where HTML tags are not expected at all +// and if they are then the elements and the content of the elements should be stripped. This is a very strict policy.' +// +// Source: https://github.com/microcosm-cc/bluemonday#usage +var strict *bluemonday.Policy = bluemonday.StrictPolicy() + +// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements. +func SanitizeHTML(in string) string { + return regular.Sanitize(in) +} + +// RemoveHTML removes all HTML from the given string. +func RemoveHTML(in string) string { + return strict.Sanitize(in) +} |