From c84384e6608368a13a774d6d33a8cc32da7cf209 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Tue, 19 Jul 2022 15:21:17 +0200 Subject: [bugfix] html escape special characters in text instead of totally removing them (#719) * remove minify dependency * tidy up some tests * remove pre + postformat funcs * rework sanitization + formatting * update tests * add some more markdown tests --- vendor/github.com/tdewolff/minify/v2/html/html.go | 511 ---------------------- 1 file changed, 511 deletions(-) delete mode 100644 vendor/github.com/tdewolff/minify/v2/html/html.go (limited to 'vendor/github.com/tdewolff/minify/v2/html/html.go') diff --git a/vendor/github.com/tdewolff/minify/v2/html/html.go b/vendor/github.com/tdewolff/minify/v2/html/html.go deleted file mode 100644 index 6868c9b79..000000000 --- a/vendor/github.com/tdewolff/minify/v2/html/html.go +++ /dev/null @@ -1,511 +0,0 @@ -// Package html minifies HTML5 following the specifications at http://www.w3.org/TR/html5/syntax.html. -package html - -import ( - "bytes" - "io" - - "github.com/tdewolff/minify/v2" - "github.com/tdewolff/parse/v2" - "github.com/tdewolff/parse/v2/buffer" - "github.com/tdewolff/parse/v2/html" -) - -var ( - gtBytes = []byte(">") - isBytes = []byte("=") - spaceBytes = []byte(" ") - doctypeBytes = []byte("") - jsMimeBytes = []byte("application/javascript") - cssMimeBytes = []byte("text/css") - htmlMimeBytes = []byte("text/html") - svgMimeBytes = []byte("image/svg+xml") - formMimeBytes = []byte("application/x-www-form-urlencoded") - mathMimeBytes = []byte("application/mathml+xml") - dataSchemeBytes = []byte("data:") - jsSchemeBytes = []byte("javascript:") - httpBytes = []byte("http") - radioBytes = []byte("radio") - onBytes = []byte("on") - textBytes = []byte("text") - noneBytes = []byte("none") - submitBytes = []byte("submit") - allBytes = []byte("all") - rectBytes = []byte("rect") - dataBytes = []byte("data") - getBytes = []byte("get") - autoBytes = []byte("auto") - oneBytes = []byte("one") - inlineParams = map[string]string{"inline": "1"} -) - -//////////////////////////////////////////////////////////////// - -// DefaultMinifier is the default minifier. -var DefaultMinifier = &Minifier{} - -// Minifier is an HTML minifier. -type Minifier struct { - KeepComments bool - KeepConditionalComments bool - KeepDefaultAttrVals bool - KeepDocumentTags bool - KeepEndTags bool - KeepQuotes bool - KeepWhitespace bool -} - -// Minify minifies HTML data, it reads from r and writes to w. -func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error { - return DefaultMinifier.Minify(m, w, r, params) -} - -// Minify minifies HTML data, it reads from r and writes to w. -func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error { - var rawTagHash Hash - var rawTagMediatype []byte - - omitSpace := true // if true the next leading space is omitted - inPre := false - - attrMinifyBuffer := buffer.NewWriter(make([]byte, 0, 64)) - attrByteBuffer := make([]byte, 0, 64) - - z := parse.NewInput(r) - defer z.Restore() - - l := html.NewLexer(z) - tb := NewTokenBuffer(z, l) - for { - t := *tb.Shift() - switch t.TokenType { - case html.ErrorToken: - if _, err := w.Write(nil); err != nil { - return err - } - if l.Err() == io.EOF { - return nil - } - return l.Err() - case html.DoctypeToken: - w.Write(doctypeBytes) - case html.CommentToken: - if o.KeepComments { - w.Write(t.Data) - } else if o.KeepConditionalComments && 6 < len(t.Text) && (bytes.HasPrefix(t.Text, []byte("[if ")) || bytes.HasSuffix(t.Text, []byte("[endif]")) || bytes.HasSuffix(t.Text, []byte("[endif]--"))) { - // [if ...] is always 7 or more characters, [endif] is only encountered for downlevel-revealed - // see https://msdn.microsoft.com/en-us/library/ms537512(v=vs.85).aspx#syntax - if bytes.HasPrefix(t.Data, []byte("")) { // downlevel-hidden - begin := bytes.IndexByte(t.Data, '>') + 1 - end := len(t.Data) - len("") - w.Write(t.Data[:begin]) - if err := o.Minify(m, w, buffer.NewReader(t.Data[begin:end]), nil); err != nil { - return minify.UpdateErrorPosition(err, z, t.Offset) - } - w.Write(t.Data[end:]) - } else { - w.Write(t.Data) // downlevel-revealed or short downlevel-hidden - } - } else if 1 < len(t.Text) && t.Text[0] == '#' { - // SSI tags - w.Write(t.Data) - } - case html.SvgToken: - if err := m.MinifyMimetype(svgMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil { - if err != minify.ErrNotExist { - return minify.UpdateErrorPosition(err, z, t.Offset) - } - w.Write(t.Data) - } - case html.MathToken: - if err := m.MinifyMimetype(mathMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil { - if err != minify.ErrNotExist { - return minify.UpdateErrorPosition(err, z, t.Offset) - } - w.Write(t.Data) - } - case html.TextToken: - // CSS and JS minifiers for inline code - if rawTagHash != 0 { - if rawTagHash == Style || rawTagHash == Script || rawTagHash == Iframe { - var mimetype []byte - var params map[string]string - if rawTagHash == Iframe { - mimetype = htmlMimeBytes - } else if len(rawTagMediatype) > 0 { - mimetype, params = parse.Mediatype(rawTagMediatype) - } else if rawTagHash == Script { - mimetype = jsMimeBytes - } else if rawTagHash == Style { - mimetype = cssMimeBytes - } - if err := m.MinifyMimetype(mimetype, w, buffer.NewReader(t.Data), params); err != nil { - if err != minify.ErrNotExist { - return minify.UpdateErrorPosition(err, z, t.Offset) - } - w.Write(t.Data) - } - } else { - w.Write(t.Data) - } - } else if inPre { - w.Write(t.Data) - } else { - t.Data = parse.ReplaceMultipleWhitespaceAndEntities(t.Data, EntitiesMap, TextRevEntitiesMap) - - // whitespace removal; trim left - if omitSpace && parse.IsWhitespace(t.Data[0]) { - t.Data = t.Data[1:] - } - - // whitespace removal; trim right - omitSpace = false - if len(t.Data) == 0 { - omitSpace = true - } else if parse.IsWhitespace(t.Data[len(t.Data)-1]) { - omitSpace = true - i := 0 - for { - next := tb.Peek(i) - // trim if EOF, text token with leading whitespace or block token - if next.TokenType == html.ErrorToken { - t.Data = t.Data[:len(t.Data)-1] - omitSpace = false - break - } else if next.TokenType == html.TextToken { - // this only happens when a comment, doctype or phrasing end tag (only for !o.KeepWhitespace) was in between - // remove if the text token starts with a whitespace - if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) { - t.Data = t.Data[:len(t.Data)-1] - omitSpace = false - } - break - } else if next.TokenType == html.StartTagToken || next.TokenType == html.EndTagToken { - if o.KeepWhitespace { - break - } - // remove when followed up by a block tag - if next.Traits&nonPhrasingTag != 0 { - t.Data = t.Data[:len(t.Data)-1] - omitSpace = false - break - } else if next.TokenType == html.StartTagToken { - break - } - } - i++ - } - } - - w.Write(t.Data) - } - case html.StartTagToken, html.EndTagToken: - rawTagHash = 0 - hasAttributes := false - if t.TokenType == html.StartTagToken { - if next := tb.Peek(0); next.TokenType == html.AttributeToken { - hasAttributes = true - } - if t.Traits&rawTag != 0 { - // ignore empty script and style tags - if !hasAttributes && (t.Hash == Script || t.Hash == Style) { - if next := tb.Peek(1); next.TokenType == html.EndTagToken { - tb.Shift() - tb.Shift() - break - } - } - rawTagHash = t.Hash - rawTagMediatype = nil - - // do not minify content of