summaryrefslogtreecommitdiff
path: root/vendor/github.com/tdewolff/minify/v2/html/html.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/tdewolff/minify/v2/html/html.go')
-rw-r--r--vendor/github.com/tdewolff/minify/v2/html/html.go513
1 files changed, 513 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/minify/v2/html/html.go b/vendor/github.com/tdewolff/minify/v2/html/html.go
new file mode 100644
index 000000000..3431ad3be
--- /dev/null
+++ b/vendor/github.com/tdewolff/minify/v2/html/html.go
@@ -0,0 +1,513 @@
+// Package html minifies HTML5 following the specifications at http://www.w3.org/TR/html5/syntax.html.
+package html
+
+import (
+ "bytes"
+ "io"
+
+ "github.com/tdewolff/minify/v2"
+ "github.com/tdewolff/parse/v2"
+ "github.com/tdewolff/parse/v2/buffer"
+ "github.com/tdewolff/parse/v2/html"
+)
+
+var (
+ gtBytes = []byte(">")
+ isBytes = []byte("=")
+ spaceBytes = []byte(" ")
+ doctypeBytes = []byte("<!doctype html>")
+ jsMimeBytes = []byte("application/javascript")
+ cssMimeBytes = []byte("text/css")
+ htmlMimeBytes = []byte("text/html")
+ svgMimeBytes = []byte("image/svg+xml")
+ formMimeBytes = []byte("application/x-www-form-urlencoded")
+ mathMimeBytes = []byte("application/mathml+xml")
+ dataSchemeBytes = []byte("data:")
+ jsSchemeBytes = []byte("javascript:")
+ httpBytes = []byte("http")
+ radioBytes = []byte("radio")
+ onBytes = []byte("on")
+ textBytes = []byte("text")
+ noneBytes = []byte("none")
+ submitBytes = []byte("submit")
+ allBytes = []byte("all")
+ rectBytes = []byte("rect")
+ dataBytes = []byte("data")
+ getBytes = []byte("get")
+ autoBytes = []byte("auto")
+ oneBytes = []byte("one")
+ inlineParams = map[string]string{"inline": "1"}
+)
+
+////////////////////////////////////////////////////////////////
+
+// Minifier is an HTML minifier.
+type Minifier struct {
+ KeepComments bool
+ KeepConditionalComments bool
+ KeepDefaultAttrVals bool
+ KeepDocumentTags bool
+ KeepEndTags bool
+ KeepQuotes bool
+ KeepWhitespace bool
+}
+
+// Minify minifies HTML data, it reads from r and writes to w.
+func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
+ return (&Minifier{}).Minify(m, w, r, params)
+}
+
+// Minify minifies HTML data, it reads from r and writes to w.
+func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
+ var rawTagHash Hash
+ var rawTagMediatype []byte
+
+ omitSpace := true // if true the next leading space is omitted
+ inPre := false
+
+ attrMinifyBuffer := buffer.NewWriter(make([]byte, 0, 64))
+ attrByteBuffer := make([]byte, 0, 64)
+
+ z := parse.NewInput(r)
+ defer z.Restore()
+
+ l := html.NewLexer(z)
+ tb := NewTokenBuffer(z, l)
+ for {
+ t := *tb.Shift()
+ switch t.TokenType {
+ case html.ErrorToken:
+ if _, err := w.Write(nil); err != nil {
+ return err
+ }
+ if l.Err() == io.EOF {
+ return nil
+ }
+ return l.Err()
+ case html.DoctypeToken:
+ w.Write(doctypeBytes)
+ case html.CommentToken:
+ if o.KeepComments {
+ w.Write(t.Data)
+ } else if o.KeepConditionalComments && 6 < len(t.Text) && (bytes.HasPrefix(t.Text, []byte("[if ")) || bytes.HasSuffix(t.Text, []byte("[endif]")) || bytes.HasSuffix(t.Text, []byte("[endif]--"))) {
+ // [if ...] is always 7 or more characters, [endif] is only encountered for downlevel-revealed
+ // see https://msdn.microsoft.com/en-us/library/ms537512(v=vs.85).aspx#syntax
+ if bytes.HasPrefix(t.Data, []byte("<!--[if ")) && bytes.HasSuffix(t.Data, []byte("<![endif]-->")) { // downlevel-hidden
+ begin := bytes.IndexByte(t.Data, '>') + 1
+ end := len(t.Data) - len("<![endif]-->")
+ w.Write(t.Data[:begin])
+ if err := o.Minify(m, w, buffer.NewReader(t.Data[begin:end]), nil); err != nil {
+ return minify.UpdateErrorPosition(err, z, t.Offset)
+ }
+ w.Write(t.Data[end:])
+ } else {
+ w.Write(t.Data) // downlevel-revealed or short downlevel-hidden
+ }
+ } else if 1 < len(t.Text) && t.Text[0] == '#' {
+ // SSI tags
+ w.Write(t.Data)
+ }
+ case html.SvgToken:
+ if err := m.MinifyMimetype(svgMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil {
+ if err != minify.ErrNotExist {
+ return minify.UpdateErrorPosition(err, z, t.Offset)
+ }
+ w.Write(t.Data)
+ }
+ case html.MathToken:
+ if err := m.MinifyMimetype(mathMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil {
+ if err != minify.ErrNotExist {
+ return minify.UpdateErrorPosition(err, z, t.Offset)
+ }
+ w.Write(t.Data)
+ }
+ case html.TextToken:
+ // CSS and JS minifiers for inline code
+ if rawTagHash != 0 {
+ if rawTagHash == Style || rawTagHash == Script || rawTagHash == Iframe {
+ var mimetype []byte
+ var params map[string]string
+ if rawTagHash == Iframe {
+ mimetype = htmlMimeBytes
+ } else if len(rawTagMediatype) > 0 {
+ mimetype, params = parse.Mediatype(rawTagMediatype)
+ } else if rawTagHash == Script {
+ mimetype = jsMimeBytes
+ } else if rawTagHash == Style {
+ mimetype = cssMimeBytes
+ }
+ if err := m.MinifyMimetype(mimetype, w, buffer.NewReader(t.Data), params); err != nil {
+ if err != minify.ErrNotExist {
+ return minify.UpdateErrorPosition(err, z, t.Offset)
+ }
+ w.Write(t.Data)
+ }
+ } else {
+ w.Write(t.Data)
+ }
+ } else if inPre {
+ w.Write(t.Data)
+ } else {
+ t.Data = parse.ReplaceMultipleWhitespaceAndEntities(t.Data, EntitiesMap, TextRevEntitiesMap)
+
+ // whitespace removal; trim left
+ if omitSpace && parse.IsWhitespace(t.Data[0]) {
+ t.Data = t.Data[1:]
+ }
+
+ // whitespace removal; trim right
+ omitSpace = false
+ if len(t.Data) == 0 {
+ omitSpace = true
+ } else if parse.IsWhitespace(t.Data[len(t.Data)-1]) {
+ omitSpace = true
+ i := 0
+ for {
+ next := tb.Peek(i)
+ // trim if EOF, text token with leading whitespace or block token
+ if next.TokenType == html.ErrorToken {
+ t.Data = t.Data[:len(t.Data)-1]
+ omitSpace = false
+ break
+ } else if next.TokenType == html.TextToken {
+ // this only happens when a comment, doctype or phrasing end tag (only for !o.KeepWhitespace) was in between
+ // remove if the text token starts with a whitespace
+ if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) {
+ t.Data = t.Data[:len(t.Data)-1]
+ omitSpace = false
+ }
+ break
+ } else if next.TokenType == html.StartTagToken || next.TokenType == html.EndTagToken {
+ if o.KeepWhitespace {
+ break
+ }
+ // remove when followed up by a block tag
+ if next.Traits&nonPhrasingTag != 0 {
+ t.Data = t.Data[:len(t.Data)-1]
+ omitSpace = false
+ break
+ } else if next.TokenType == html.StartTagToken {
+ break
+ }
+ }
+ i++
+ }
+ }
+
+ w.Write(t.Data)
+ }
+ case html.StartTagToken, html.EndTagToken:
+ rawTagHash = 0
+ hasAttributes := false
+ if t.TokenType == html.StartTagToken {
+ if next := tb.Peek(0); next.TokenType == html.AttributeToken {
+ hasAttributes = true
+ }
+ if t.Traits&rawTag != 0 {
+ // ignore empty script and style tags
+ if !hasAttributes && (t.Hash == Script || t.Hash == Style) {
+ if next := tb.Peek(1); next.TokenType == html.EndTagToken {
+ tb.Shift()
+ tb.Shift()
+ break
+ }
+ }
+ rawTagHash = t.Hash
+ rawTagMediatype = nil
+
+ // do not minify content of <style amp-boilerplate>
+ if hasAttributes && t.Hash == Style {
+ if attrs := tb.Attributes(Amp_Boilerplate); attrs[0] != nil {
+ rawTagHash = 0
+ }
+ }
+ }
+ } else if t.Hash == Template {
+ omitSpace = true // EndTagToken
+ }
+
+ if t.Hash == Pre {
+ inPre = t.TokenType == html.StartTagToken
+ }
+
+ // remove superfluous tags, except for html, head and body tags when KeepDocumentTags is set
+ if !hasAttributes && (!o.KeepDocumentTags && (t.Hash == Html || t.Hash == Head || t.Hash == Body) || t.Hash == Colgroup) {
+ break
+ } else if t.TokenType == html.EndTagToken {
+ omitEndTag := false
+ if !o.KeepEndTags {
+ if t.Hash == Thead || t.Hash == Tbody || t.Hash == Tfoot || t.Hash == Tr || t.Hash == Th ||
+ t.Hash == Td || t.Hash == Option || t.Hash == Dd || t.Hash == Dt || t.Hash == Li ||
+ t.Hash == Rb || t.Hash == Rt || t.Hash == Rtc || t.Hash == Rp {
+ omitEndTag = true // omit end tags
+ } else if t.Hash == P {
+ i := 0
+ for {
+ next := tb.Peek(i)
+ i++
+ // continue if text token is empty or whitespace
+ if next.TokenType == html.TextToken && parse.IsAllWhitespace(next.Data) {
+ continue
+ }
+ if next.TokenType == html.ErrorToken || next.TokenType == html.EndTagToken && next.Traits&keepPTag == 0 || next.TokenType == html.StartTagToken && next.Traits&omitPTag != 0 {
+ omitEndTag = true // omit p end tag
+ }
+ break
+ }
+ } else if t.Hash == Optgroup {
+ i := 0
+ for {
+ next := tb.Peek(i)
+ i++
+ // continue if text token
+ if next.TokenType == html.TextToken {
+ continue
+ }
+ if next.TokenType == html.ErrorToken || next.Hash != Option {
+ omitEndTag = true // omit optgroup end tag
+ }
+ break
+ }
+ }
+ }
+
+ if t.Traits&nonPhrasingTag != 0 {
+ omitSpace = true // omit spaces after block elements
+ } else if o.KeepWhitespace || t.Traits&objectTag != 0 {
+ omitSpace = false
+ }
+
+ if !omitEndTag {
+ if len(t.Data) > 3+len(t.Text) {
+ t.Data[2+len(t.Text)] = '>'
+ t.Data = t.Data[:3+len(t.Text)]
+ }
+ w.Write(t.Data)
+ }
+
+ // skip text in select and optgroup tags
+ if t.Hash == Option || t.Hash == Optgroup {
+ if next := tb.Peek(0); next.TokenType == html.TextToken {
+ tb.Shift()
+ }
+ }
+ break
+ }
+
+ if o.KeepWhitespace || t.Traits&objectTag != 0 {
+ omitSpace = false
+ } else if t.Traits&nonPhrasingTag != 0 {
+ omitSpace = true // omit spaces after block elements
+ }
+
+ w.Write(t.Data)
+
+ if hasAttributes {
+ if t.Hash == Meta {
+ attrs := tb.Attributes(Content, Http_Equiv, Charset, Name)
+ if content := attrs[0]; content != nil {
+ if httpEquiv := attrs[1]; httpEquiv != nil {
+ httpEquiv.AttrVal = parse.TrimWhitespace(httpEquiv.AttrVal)
+ if charset := attrs[2]; charset == nil && parse.EqualFold(httpEquiv.AttrVal, []byte("content-type")) {
+ content.AttrVal = minify.Mediatype(content.AttrVal)
+ if bytes.Equal(content.AttrVal, []byte("text/html;charset=utf-8")) {
+ httpEquiv.Text = nil
+ content.Text = []byte("charset")
+ content.Hash = Charset
+ content.AttrVal = []byte("utf-8")
+ }
+ }
+ }
+ if name := attrs[3]; name != nil {
+ name.AttrVal = parse.TrimWhitespace(name.AttrVal)
+ if parse.EqualFold(name.AttrVal, []byte("keywords")) {
+ content.AttrVal = bytes.ReplaceAll(content.AttrVal, []byte(", "), []byte(","))
+ } else if parse.EqualFold(name.AttrVal, []byte("viewport")) {
+ content.AttrVal = bytes.ReplaceAll(content.AttrVal, []byte(" "), []byte(""))
+ for i := 0; i < len(content.AttrVal); i++ {
+ if content.AttrVal[i] == '=' && i+2 < len(content.AttrVal) {
+ i++
+ if n := parse.Number(content.AttrVal[i:]); n > 0 {
+ minNum := minify.Number(content.AttrVal[i:i+n], -1)
+ if len(minNum) < n {
+ copy(content.AttrVal[i:i+len(minNum)], minNum)
+ copy(content.AttrVal[i+len(minNum):], content.AttrVal[i+n:])
+ content.AttrVal = content.AttrVal[:len(content.AttrVal)+len(minNum)-n]
+ }
+ i += len(minNum)
+ }
+ i-- // mitigate for-loop increase
+ }
+ }
+ }
+ }
+ }
+ } else if t.Hash == Script {
+ attrs := tb.Attributes(Src, Charset)
+ if attrs[0] != nil && attrs[1] != nil {
+ attrs[1].Text = nil
+ }
+ } else if t.Hash == Input {
+ attrs := tb.Attributes(Type, Value)
+ if t, value := attrs[0], attrs[1]; t != nil && value != nil {
+ isRadio := parse.EqualFold(t.AttrVal, radioBytes)
+ if !isRadio && len(value.AttrVal) == 0 {
+ value.Text = nil
+ } else if isRadio && parse.EqualFold(value.AttrVal, onBytes) {
+ value.Text = nil
+ }
+ }
+ } else if t.Hash == A {
+ attrs := tb.Attributes(Id, Name)
+ if id, name := attrs[0], attrs[1]; id != nil && name != nil {
+ if bytes.Equal(id.AttrVal, name.AttrVal) {
+ name.Text = nil
+ }
+ }
+ }
+
+ // write attributes
+ for {
+ attr := *tb.Shift()
+ if attr.TokenType != html.AttributeToken {
+ break
+ } else if attr.Text == nil {
+ continue // removed attribute
+ }
+
+ val := attr.AttrVal
+ if attr.Traits&trimAttr != 0 {
+ val = parse.ReplaceMultipleWhitespaceAndEntities(val, EntitiesMap, nil)
+ val = parse.TrimWhitespace(val)
+ } else {
+ val = parse.ReplaceEntities(val, EntitiesMap, nil)
+ }
+ if t.Traits != 0 {
+ if len(val) == 0 && (attr.Hash == Class ||
+ attr.Hash == Dir ||
+ attr.Hash == Id ||
+ attr.Hash == Name ||
+ attr.Hash == Action && t.Hash == Form) {
+ continue // omit empty attribute values
+ }
+ if attr.Traits&caselessAttr != 0 {
+ val = parse.ToLower(val)
+ if attr.Hash == Enctype || attr.Hash == Codetype || attr.Hash == Accept || attr.Hash == Type && (t.Hash == A || t.Hash == Link || t.Hash == Embed || t.Hash == Object || t.Hash == Source || t.Hash == Script || t.Hash == Style) {
+ val = minify.Mediatype(val)
+ }
+ }
+ if rawTagHash != 0 && attr.Hash == Type {
+ rawTagMediatype = parse.Copy(val)
+ }
+
+ // default attribute values can be omitted
+ if !o.KeepDefaultAttrVals && (attr.Hash == Type && (t.Hash == Script && jsMimetypes[string(val)] ||
+ t.Hash == Style && bytes.Equal(val, cssMimeBytes) ||
+ t.Hash == Link && bytes.Equal(val, cssMimeBytes) ||
+ t.Hash == Input && bytes.Equal(val, textBytes) ||
+ t.Hash == Button && bytes.Equal(val, submitBytes)) ||
+ attr.Hash == Language && t.Hash == Script ||
+ attr.Hash == Method && bytes.Equal(val, getBytes) ||
+ attr.Hash == Enctype && bytes.Equal(val, formMimeBytes) ||
+ attr.Hash == Colspan && bytes.Equal(val, oneBytes) ||
+ attr.Hash == Rowspan && bytes.Equal(val, oneBytes) ||
+ attr.Hash == Shape && bytes.Equal(val, rectBytes) ||
+ attr.Hash == Span && bytes.Equal(val, oneBytes) ||
+ attr.Hash == Clear && bytes.Equal(val, noneBytes) ||
+ attr.Hash == Frameborder && bytes.Equal(val, oneBytes) ||
+ attr.Hash == Scrolling && bytes.Equal(val, autoBytes) ||
+ attr.Hash == Valuetype && bytes.Equal(val, dataBytes) ||
+ attr.Hash == Media && t.Hash == Style && bytes.Equal(val, allBytes)) {
+ continue
+ }
+
+ if attr.Hash == Style {
+ // CSS minifier for attribute inline code
+ val = parse.TrimWhitespace(val)
+ attrMinifyBuffer.Reset()
+ if err := m.MinifyMimetype(cssMimeBytes, attrMinifyBuffer, buffer.NewReader(val), inlineParams); err == nil {
+ val = attrMinifyBuffer.Bytes()
+ } else if err != minify.ErrNotExist {
+ return minify.UpdateErrorPosition(err, z, attr.Offset)
+ }
+ if len(val) == 0 {
+ continue
+ }
+ } else if len(attr.Text) > 2 && attr.Text[0] == 'o' && attr.Text[1] == 'n' {
+ // JS minifier for attribute inline code
+ val = parse.TrimWhitespace(val)
+ if len(val) >= 11 && parse.EqualFold(val[:11], jsSchemeBytes) {
+ val = val[11:]
+ }
+ attrMinifyBuffer.Reset()
+ if err := m.MinifyMimetype(jsMimeBytes, attrMinifyBuffer, buffer.NewReader(val), nil); err == nil {
+ val = attrMinifyBuffer.Bytes()
+ } else if err != minify.ErrNotExist {
+ return minify.UpdateErrorPosition(err, z, attr.Offset)
+ }
+ if len(val) == 0 {
+ continue
+ }
+ } else if attr.Traits&urlAttr != 0 { // anchors are already handled
+ val = parse.TrimWhitespace(val)
+ if 5 < len(val) {
+ if parse.EqualFold(val[:4], httpBytes) {
+ if val[4] == ':' {
+ if m.URL != nil && m.URL.Scheme == "http" {
+ val = val[5:]
+ } else {
+ parse.ToLower(val[:4])
+ }
+ } else if (val[4] == 's' || val[4] == 'S') && val[5] == ':' {
+ if m.URL != nil && m.URL.Scheme == "https" {
+ val = val[6:]
+ } else {
+ parse.ToLower(val[:5])
+ }
+ }
+ } else if parse.EqualFold(val[:5], dataSchemeBytes) {
+ val = minify.DataURI(m, val)
+ }
+ }
+ }
+ }
+
+ w.Write(spaceBytes)
+ w.Write(attr.Text)
+ if len(val) > 0 && attr.Traits&booleanAttr == 0 {
+ w.Write(isBytes)
+
+ // use double quotes for RDFa attributes
+ isXML := attr.Hash == Vocab || attr.Hash == Typeof || attr.Hash == Property || attr.Hash == Resource || attr.Hash == Prefix || attr.Hash == Content || attr.Hash == About || attr.Hash == Rev || attr.Hash == Datatype || attr.Hash == Inlist
+
+ // no quotes if possible, else prefer single or double depending on which occurs more often in value
+ var quote byte
+
+ if 0 < len(attr.Data) && (attr.Data[len(attr.Data)-1] == '\'' || attr.Data[len(attr.Data)-1] == '"') {
+ quote = attr.Data[len(attr.Data)-1]
+ }
+ val = html.EscapeAttrVal(&attrByteBuffer, val, quote, o.KeepQuotes, isXML)
+ w.Write(val)
+ }
+ }
+ } else {
+ _ = tb.Shift() // StartTagClose
+ }
+ w.Write(gtBytes)
+
+ // skip text in select and optgroup tags
+ if t.Hash == Select || t.Hash == Optgroup {
+ if next := tb.Peek(0); next.TokenType == html.TextToken {
+ tb.Shift()
+ }
+ }
+
+ // keep space after phrasing tags (<i>, <span>, ...) FontAwesome etc.
+ if t.TokenType == html.StartTagToken && t.Traits&nonPhrasingTag == 0 {
+ if next := tb.Peek(0); next.Hash == t.Hash && next.TokenType == html.EndTagToken {
+ omitSpace = false
+ }
+ }
+ }
+ }
+}