From 5668ce1ec701ed12eb099020e8a322de08e6f810 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Thu, 26 May 2022 11:37:13 +0200 Subject: [bugfix] Fix HTML escaping in instance title (#607) * move caption sanitization -> sanitize.go * use sanitizeplaintext rather than removehtml * rename sanitizecaption to sanitizeplaintext * avoid removing html twice from statuses * unexport remoteHTML it's no longer used outside the text package so this makes it less confusing * test instance PATCH --- internal/text/sanitize.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'internal/text/sanitize.go') diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index 897dea34d..d4faabbb1 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -46,12 +46,20 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy(). // Source: https://github.com/microcosm-cc/bluemonday#usage var strict *bluemonday.Policy = bluemonday.StrictPolicy() -// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements. +// removeHTML strictly removes *all* recognized HTML elements from the given string. +func removeHTML(in string) string { + return strict.Sanitize(in) +} + +// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through. func SanitizeHTML(in string) string { return regular.Sanitize(in) } -// RemoveHTML removes all HTML from the given string. -func RemoveHTML(in string) string { - return strict.Sanitize(in) +// SanitizePlaintext runs text through basic sanitization. This removes +// any html elements that were in the string, and returns clean plaintext. +func SanitizePlaintext(in string) string { + content := preformat(in) + content = removeHTML(content) + return postformat(content) } -- cgit v1.2.3