| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
 | /*
   GoToSocial
   Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU Affero General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Affero General Public License for more details.
   You should have received a copy of the GNU Affero General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package text
import (
	"html"
	"regexp"
	"strings"
	"github.com/microcosm-cc/bluemonday"
)
// '[A]llows a broad selection of HTML elements and attributes that are safe for user generated content.
// Note that this policy does not allow iframes, object, embed, styles, script, etc.
// An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.'
//
// Source: https://github.com/microcosm-cc/bluemonday#usage
var regular *bluemonday.Policy = bluemonday.UGCPolicy().
	RequireNoReferrerOnLinks(true).
	RequireNoFollowOnLinks(true).
	RequireCrossOriginAnonymous(true).
	AddTargetBlankToFullyQualifiedLinks(true).
	AllowAttrs("class", "href", "rel").OnElements("a").
	AllowAttrs("class").OnElements("span").
	AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code").
	SkipElementsContent("code", "pre")
// '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist.
// An example usage scenario would be blog post titles where HTML tags are not expected at all
// and if they are then the elements and the content of the elements should be stripped. This is a very strict policy.'
//
// Source: https://github.com/microcosm-cc/bluemonday#usage
var strict *bluemonday.Policy = bluemonday.StrictPolicy()
// removeHTML strictly removes *all* recognized HTML elements from the given string.
func removeHTML(in string) string {
	return strict.Sanitize(in)
}
// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through.
func SanitizeHTML(in string) string {
	return regular.Sanitize(in)
}
// SanitizePlaintext runs text through basic sanitization. This removes
// any html elements that were in the string, and returns clean plaintext.
func SanitizePlaintext(in string) string {
	content := html.UnescapeString(in)
	content = removeHTML(content)
	content = html.UnescapeString(content)
	return strings.TrimSpace(content)
}
 |