diff options
Diffstat (limited to 'vendor/github.com/tdewolff/minify/v2/html/html.go')
-rw-r--r-- | vendor/github.com/tdewolff/minify/v2/html/html.go | 513 |
1 files changed, 513 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/minify/v2/html/html.go b/vendor/github.com/tdewolff/minify/v2/html/html.go new file mode 100644 index 000000000..3431ad3be --- /dev/null +++ b/vendor/github.com/tdewolff/minify/v2/html/html.go @@ -0,0 +1,513 @@ +// Package html minifies HTML5 following the specifications at http://www.w3.org/TR/html5/syntax.html. +package html + +import ( + "bytes" + "io" + + "github.com/tdewolff/minify/v2" + "github.com/tdewolff/parse/v2" + "github.com/tdewolff/parse/v2/buffer" + "github.com/tdewolff/parse/v2/html" +) + +var ( + gtBytes = []byte(">") + isBytes = []byte("=") + spaceBytes = []byte(" ") + doctypeBytes = []byte("<!doctype html>") + jsMimeBytes = []byte("application/javascript") + cssMimeBytes = []byte("text/css") + htmlMimeBytes = []byte("text/html") + svgMimeBytes = []byte("image/svg+xml") + formMimeBytes = []byte("application/x-www-form-urlencoded") + mathMimeBytes = []byte("application/mathml+xml") + dataSchemeBytes = []byte("data:") + jsSchemeBytes = []byte("javascript:") + httpBytes = []byte("http") + radioBytes = []byte("radio") + onBytes = []byte("on") + textBytes = []byte("text") + noneBytes = []byte("none") + submitBytes = []byte("submit") + allBytes = []byte("all") + rectBytes = []byte("rect") + dataBytes = []byte("data") + getBytes = []byte("get") + autoBytes = []byte("auto") + oneBytes = []byte("one") + inlineParams = map[string]string{"inline": "1"} +) + +//////////////////////////////////////////////////////////////// + +// Minifier is an HTML minifier. +type Minifier struct { + KeepComments bool + KeepConditionalComments bool + KeepDefaultAttrVals bool + KeepDocumentTags bool + KeepEndTags bool + KeepQuotes bool + KeepWhitespace bool +} + +// Minify minifies HTML data, it reads from r and writes to w. +func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error { + return (&Minifier{}).Minify(m, w, r, params) +} + +// Minify minifies HTML data, it reads from r and writes to w. +func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error { + var rawTagHash Hash + var rawTagMediatype []byte + + omitSpace := true // if true the next leading space is omitted + inPre := false + + attrMinifyBuffer := buffer.NewWriter(make([]byte, 0, 64)) + attrByteBuffer := make([]byte, 0, 64) + + z := parse.NewInput(r) + defer z.Restore() + + l := html.NewLexer(z) + tb := NewTokenBuffer(z, l) + for { + t := *tb.Shift() + switch t.TokenType { + case html.ErrorToken: + if _, err := w.Write(nil); err != nil { + return err + } + if l.Err() == io.EOF { + return nil + } + return l.Err() + case html.DoctypeToken: + w.Write(doctypeBytes) + case html.CommentToken: + if o.KeepComments { + w.Write(t.Data) + } else if o.KeepConditionalComments && 6 < len(t.Text) && (bytes.HasPrefix(t.Text, []byte("[if ")) || bytes.HasSuffix(t.Text, []byte("[endif]")) || bytes.HasSuffix(t.Text, []byte("[endif]--"))) { + // [if ...] is always 7 or more characters, [endif] is only encountered for downlevel-revealed + // see https://msdn.microsoft.com/en-us/library/ms537512(v=vs.85).aspx#syntax + if bytes.HasPrefix(t.Data, []byte("<!--[if ")) && bytes.HasSuffix(t.Data, []byte("<![endif]-->")) { // downlevel-hidden + begin := bytes.IndexByte(t.Data, '>') + 1 + end := len(t.Data) - len("<![endif]-->") + w.Write(t.Data[:begin]) + if err := o.Minify(m, w, buffer.NewReader(t.Data[begin:end]), nil); err != nil { + return minify.UpdateErrorPosition(err, z, t.Offset) + } + w.Write(t.Data[end:]) + } else { + w.Write(t.Data) // downlevel-revealed or short downlevel-hidden + } + } else if 1 < len(t.Text) && t.Text[0] == '#' { + // SSI tags + w.Write(t.Data) + } + case html.SvgToken: + if err := m.MinifyMimetype(svgMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil { + if err != minify.ErrNotExist { + return minify.UpdateErrorPosition(err, z, t.Offset) + } + w.Write(t.Data) + } + case html.MathToken: + if err := m.MinifyMimetype(mathMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil { + if err != minify.ErrNotExist { + return minify.UpdateErrorPosition(err, z, t.Offset) + } + w.Write(t.Data) + } + case html.TextToken: + // CSS and JS minifiers for inline code + if rawTagHash != 0 { + if rawTagHash == Style || rawTagHash == Script || rawTagHash == Iframe { + var mimetype []byte + var params map[string]string + if rawTagHash == Iframe { + mimetype = htmlMimeBytes + } else if len(rawTagMediatype) > 0 { + mimetype, params = parse.Mediatype(rawTagMediatype) + } else if rawTagHash == Script { + mimetype = jsMimeBytes + } else if rawTagHash == Style { + mimetype = cssMimeBytes + } + if err := m.MinifyMimetype(mimetype, w, buffer.NewReader(t.Data), params); err != nil { + if err != minify.ErrNotExist { + return minify.UpdateErrorPosition(err, z, t.Offset) + } + w.Write(t.Data) + } + } else { + w.Write(t.Data) + } + } else if inPre { + w.Write(t.Data) + } else { + t.Data = parse.ReplaceMultipleWhitespaceAndEntities(t.Data, EntitiesMap, TextRevEntitiesMap) + + // whitespace removal; trim left + if omitSpace && parse.IsWhitespace(t.Data[0]) { + t.Data = t.Data[1:] + } + + // whitespace removal; trim right + omitSpace = false + if len(t.Data) == 0 { + omitSpace = true + } else if parse.IsWhitespace(t.Data[len(t.Data)-1]) { + omitSpace = true + i := 0 + for { + next := tb.Peek(i) + // trim if EOF, text token with leading whitespace or block token + if next.TokenType == html.ErrorToken { + t.Data = t.Data[:len(t.Data)-1] + omitSpace = false + break + } else if next.TokenType == html.TextToken { + // this only happens when a comment, doctype or phrasing end tag (only for !o.KeepWhitespace) was in between + // remove if the text token starts with a whitespace + if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) { + t.Data = t.Data[:len(t.Data)-1] + omitSpace = false + } + break + } else if next.TokenType == html.StartTagToken || next.TokenType == html.EndTagToken { + if o.KeepWhitespace { + break + } + // remove when followed up by a block tag + if next.Traits&nonPhrasingTag != 0 { + t.Data = t.Data[:len(t.Data)-1] + omitSpace = false + break + } else if next.TokenType == html.StartTagToken { + break + } + } + i++ + } + } + + w.Write(t.Data) + } + case html.StartTagToken, html.EndTagToken: + rawTagHash = 0 + hasAttributes := false + if t.TokenType == html.StartTagToken { + if next := tb.Peek(0); next.TokenType == html.AttributeToken { + hasAttributes = true + } + if t.Traits&rawTag != 0 { + // ignore empty script and style tags + if !hasAttributes && (t.Hash == Script || t.Hash == Style) { + if next := tb.Peek(1); next.TokenType == html.EndTagToken { + tb.Shift() + tb.Shift() + break + } + } + rawTagHash = t.Hash + rawTagMediatype = nil + + // do not minify content of <style amp-boilerplate> + if hasAttributes && t.Hash == Style { + if attrs := tb.Attributes(Amp_Boilerplate); attrs[0] != nil { + rawTagHash = 0 + } + } + } + } else if t.Hash == Template { + omitSpace = true // EndTagToken + } + + if t.Hash == Pre { + inPre = t.TokenType == html.StartTagToken + } + + // remove superfluous tags, except for html, head and body tags when KeepDocumentTags is set + if !hasAttributes && (!o.KeepDocumentTags && (t.Hash == Html || t.Hash == Head || t.Hash == Body) || t.Hash == Colgroup) { + break + } else if t.TokenType == html.EndTagToken { + omitEndTag := false + if !o.KeepEndTags { + if t.Hash == Thead || t.Hash == Tbody || t.Hash == Tfoot || t.Hash == Tr || t.Hash == Th || + t.Hash == Td || t.Hash == Option || t.Hash == Dd || t.Hash == Dt || t.Hash == Li || + t.Hash == Rb || t.Hash == Rt || t.Hash == Rtc || t.Hash == Rp { + omitEndTag = true // omit end tags + } else if t.Hash == P { + i := 0 + for { + next := tb.Peek(i) + i++ + // continue if text token is empty or whitespace + if next.TokenType == html.TextToken && parse.IsAllWhitespace(next.Data) { + continue + } + if next.TokenType == html.ErrorToken || next.TokenType == html.EndTagToken && next.Traits&keepPTag == 0 || next.TokenType == html.StartTagToken && next.Traits&omitPTag != 0 { + omitEndTag = true // omit p end tag + } + break + } + } else if t.Hash == Optgroup { + i := 0 + for { + next := tb.Peek(i) + i++ + // continue if text token + if next.TokenType == html.TextToken { + continue + } + if next.TokenType == html.ErrorToken || next.Hash != Option { + omitEndTag = true // omit optgroup end tag + } + break + } + } + } + + if t.Traits&nonPhrasingTag != 0 { + omitSpace = true // omit spaces after block elements + } else if o.KeepWhitespace || t.Traits&objectTag != 0 { + omitSpace = false + } + + if !omitEndTag { + if len(t.Data) > 3+len(t.Text) { + t.Data[2+len(t.Text)] = '>' + t.Data = t.Data[:3+len(t.Text)] + } + w.Write(t.Data) + } + + // skip text in select and optgroup tags + if t.Hash == Option || t.Hash == Optgroup { + if next := tb.Peek(0); next.TokenType == html.TextToken { + tb.Shift() + } + } + break + } + + if o.KeepWhitespace || t.Traits&objectTag != 0 { + omitSpace = false + } else if t.Traits&nonPhrasingTag != 0 { + omitSpace = true // omit spaces after block elements + } + + w.Write(t.Data) + + if hasAttributes { + if t.Hash == Meta { + attrs := tb.Attributes(Content, Http_Equiv, Charset, Name) + if content := attrs[0]; content != nil { + if httpEquiv := attrs[1]; httpEquiv != nil { + httpEquiv.AttrVal = parse.TrimWhitespace(httpEquiv.AttrVal) + if charset := attrs[2]; charset == nil && parse.EqualFold(httpEquiv.AttrVal, []byte("content-type")) { + content.AttrVal = minify.Mediatype(content.AttrVal) + if bytes.Equal(content.AttrVal, []byte("text/html;charset=utf-8")) { + httpEquiv.Text = nil + content.Text = []byte("charset") + content.Hash = Charset + content.AttrVal = []byte("utf-8") + } + } + } + if name := attrs[3]; name != nil { + name.AttrVal = parse.TrimWhitespace(name.AttrVal) + if parse.EqualFold(name.AttrVal, []byte("keywords")) { + content.AttrVal = bytes.ReplaceAll(content.AttrVal, []byte(", "), []byte(",")) + } else if parse.EqualFold(name.AttrVal, []byte("viewport")) { + content.AttrVal = bytes.ReplaceAll(content.AttrVal, []byte(" "), []byte("")) + for i := 0; i < len(content.AttrVal); i++ { + if content.AttrVal[i] == '=' && i+2 < len(content.AttrVal) { + i++ + if n := parse.Number(content.AttrVal[i:]); n > 0 { + minNum := minify.Number(content.AttrVal[i:i+n], -1) + if len(minNum) < n { + copy(content.AttrVal[i:i+len(minNum)], minNum) + copy(content.AttrVal[i+len(minNum):], content.AttrVal[i+n:]) + content.AttrVal = content.AttrVal[:len(content.AttrVal)+len(minNum)-n] + } + i += len(minNum) + } + i-- // mitigate for-loop increase + } + } + } + } + } + } else if t.Hash == Script { + attrs := tb.Attributes(Src, Charset) + if attrs[0] != nil && attrs[1] != nil { + attrs[1].Text = nil + } + } else if t.Hash == Input { + attrs := tb.Attributes(Type, Value) + if t, value := attrs[0], attrs[1]; t != nil && value != nil { + isRadio := parse.EqualFold(t.AttrVal, radioBytes) + if !isRadio && len(value.AttrVal) == 0 { + value.Text = nil + } else if isRadio && parse.EqualFold(value.AttrVal, onBytes) { + value.Text = nil + } + } + } else if t.Hash == A { + attrs := tb.Attributes(Id, Name) + if id, name := attrs[0], attrs[1]; id != nil && name != nil { + if bytes.Equal(id.AttrVal, name.AttrVal) { + name.Text = nil + } + } + } + + // write attributes + for { + attr := *tb.Shift() + if attr.TokenType != html.AttributeToken { + break + } else if attr.Text == nil { + continue // removed attribute + } + + val := attr.AttrVal + if attr.Traits&trimAttr != 0 { + val = parse.ReplaceMultipleWhitespaceAndEntities(val, EntitiesMap, nil) + val = parse.TrimWhitespace(val) + } else { + val = parse.ReplaceEntities(val, EntitiesMap, nil) + } + if t.Traits != 0 { + if len(val) == 0 && (attr.Hash == Class || + attr.Hash == Dir || + attr.Hash == Id || + attr.Hash == Name || + attr.Hash == Action && t.Hash == Form) { + continue // omit empty attribute values + } + if attr.Traits&caselessAttr != 0 { + val = parse.ToLower(val) + if attr.Hash == Enctype || attr.Hash == Codetype || attr.Hash == Accept || attr.Hash == Type && (t.Hash == A || t.Hash == Link || t.Hash == Embed || t.Hash == Object || t.Hash == Source || t.Hash == Script || t.Hash == Style) { + val = minify.Mediatype(val) + } + } + if rawTagHash != 0 && attr.Hash == Type { + rawTagMediatype = parse.Copy(val) + } + + // default attribute values can be omitted + if !o.KeepDefaultAttrVals && (attr.Hash == Type && (t.Hash == Script && jsMimetypes[string(val)] || + t.Hash == Style && bytes.Equal(val, cssMimeBytes) || + t.Hash == Link && bytes.Equal(val, cssMimeBytes) || + t.Hash == Input && bytes.Equal(val, textBytes) || + t.Hash == Button && bytes.Equal(val, submitBytes)) || + attr.Hash == Language && t.Hash == Script || + attr.Hash == Method && bytes.Equal(val, getBytes) || + attr.Hash == Enctype && bytes.Equal(val, formMimeBytes) || + attr.Hash == Colspan && bytes.Equal(val, oneBytes) || + attr.Hash == Rowspan && bytes.Equal(val, oneBytes) || + attr.Hash == Shape && bytes.Equal(val, rectBytes) || + attr.Hash == Span && bytes.Equal(val, oneBytes) || + attr.Hash == Clear && bytes.Equal(val, noneBytes) || + attr.Hash == Frameborder && bytes.Equal(val, oneBytes) || + attr.Hash == Scrolling && bytes.Equal(val, autoBytes) || + attr.Hash == Valuetype && bytes.Equal(val, dataBytes) || + attr.Hash == Media && t.Hash == Style && bytes.Equal(val, allBytes)) { + continue + } + + if attr.Hash == Style { + // CSS minifier for attribute inline code + val = parse.TrimWhitespace(val) + attrMinifyBuffer.Reset() + if err := m.MinifyMimetype(cssMimeBytes, attrMinifyBuffer, buffer.NewReader(val), inlineParams); err == nil { + val = attrMinifyBuffer.Bytes() + } else if err != minify.ErrNotExist { + return minify.UpdateErrorPosition(err, z, attr.Offset) + } + if len(val) == 0 { + continue + } + } else if len(attr.Text) > 2 && attr.Text[0] == 'o' && attr.Text[1] == 'n' { + // JS minifier for attribute inline code + val = parse.TrimWhitespace(val) + if len(val) >= 11 && parse.EqualFold(val[:11], jsSchemeBytes) { + val = val[11:] + } + attrMinifyBuffer.Reset() + if err := m.MinifyMimetype(jsMimeBytes, attrMinifyBuffer, buffer.NewReader(val), nil); err == nil { + val = attrMinifyBuffer.Bytes() + } else if err != minify.ErrNotExist { + return minify.UpdateErrorPosition(err, z, attr.Offset) + } + if len(val) == 0 { + continue + } + } else if attr.Traits&urlAttr != 0 { // anchors are already handled + val = parse.TrimWhitespace(val) + if 5 < len(val) { + if parse.EqualFold(val[:4], httpBytes) { + if val[4] == ':' { + if m.URL != nil && m.URL.Scheme == "http" { + val = val[5:] + } else { + parse.ToLower(val[:4]) + } + } else if (val[4] == 's' || val[4] == 'S') && val[5] == ':' { + if m.URL != nil && m.URL.Scheme == "https" { + val = val[6:] + } else { + parse.ToLower(val[:5]) + } + } + } else if parse.EqualFold(val[:5], dataSchemeBytes) { + val = minify.DataURI(m, val) + } + } + } + } + + w.Write(spaceBytes) + w.Write(attr.Text) + if len(val) > 0 && attr.Traits&booleanAttr == 0 { + w.Write(isBytes) + + // use double quotes for RDFa attributes + isXML := attr.Hash == Vocab || attr.Hash == Typeof || attr.Hash == Property || attr.Hash == Resource || attr.Hash == Prefix || attr.Hash == Content || attr.Hash == About || attr.Hash == Rev || attr.Hash == Datatype || attr.Hash == Inlist + + // no quotes if possible, else prefer single or double depending on which occurs more often in value + var quote byte + + if 0 < len(attr.Data) && (attr.Data[len(attr.Data)-1] == '\'' || attr.Data[len(attr.Data)-1] == '"') { + quote = attr.Data[len(attr.Data)-1] + } + val = html.EscapeAttrVal(&attrByteBuffer, val, quote, o.KeepQuotes, isXML) + w.Write(val) + } + } + } else { + _ = tb.Shift() // StartTagClose + } + w.Write(gtBytes) + + // skip text in select and optgroup tags + if t.Hash == Select || t.Hash == Optgroup { + if next := tb.Peek(0); next.TokenType == html.TextToken { + tb.Shift() + } + } + + // keep space after phrasing tags (<i>, <span>, ...) FontAwesome etc. + if t.TokenType == html.StartTagToken && t.Traits&nonPhrasingTag == 0 { + if next := tb.Peek(0); next.Hash == t.Hash && next.TokenType == html.EndTagToken { + omitSpace = false + } + } + } + } +} |