From 5668ce1ec701ed12eb099020e8a322de08e6f810 Mon Sep 17 00:00:00 2001
From: tobi <31960611+tsmethurst@users.noreply.github.com>
Date: Thu, 26 May 2022 11:37:13 +0200
Subject: [bugfix] Fix HTML escaping in instance title (#607)

* move caption sanitization -> sanitize.go

* use sanitizeplaintext rather than removehtml

* rename sanitizecaption to sanitizeplaintext

* avoid removing html twice from statuses

* unexport remoteHTML
it's no longer used outside the text package so this
makes it less confusing

* test instance PATCH
---
 internal/text/sanitize.go | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'internal/text/sanitize.go')

diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go
index 897dea34d..d4faabbb1 100644
--- a/internal/text/sanitize.go
+++ b/internal/text/sanitize.go
@@ -46,12 +46,20 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy().
 // Source: https://github.com/microcosm-cc/bluemonday#usage
 var strict *bluemonday.Policy = bluemonday.StrictPolicy()
 
-// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements.
+// removeHTML strictly removes *all* recognized HTML elements from the given string.
+func removeHTML(in string) string {
+	return strict.Sanitize(in)
+}
+
+// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through.
 func SanitizeHTML(in string) string {
 	return regular.Sanitize(in)
 }
 
-// RemoveHTML removes all HTML from the given string.
-func RemoveHTML(in string) string {
-	return strict.Sanitize(in)
+// SanitizePlaintext runs text through basic sanitization. This removes
+// any html elements that were in the string, and returns clean plaintext.
+func SanitizePlaintext(in string) string {
+	content := preformat(in)
+	content = removeHTML(content)
+	return postformat(content)
 }
-- 
cgit v1.2.3