diff options
Diffstat (limited to 'vendor/github.com/tdewolff')
| -rw-r--r-- | vendor/github.com/tdewolff/minify/v2/html/html.go | 2 | ||||
| -rw-r--r-- | vendor/github.com/tdewolff/parse/v2/css/README.md | 170 | ||||
| -rw-r--r-- | vendor/github.com/tdewolff/parse/v2/css/hash.go | 75 | ||||
| -rw-r--r-- | vendor/github.com/tdewolff/parse/v2/css/lex.go | 698 | ||||
| -rw-r--r-- | vendor/github.com/tdewolff/parse/v2/css/parse.go | 493 | ||||
| -rw-r--r-- | vendor/github.com/tdewolff/parse/v2/css/util.go | 47 | ||||
| -rw-r--r-- | vendor/github.com/tdewolff/parse/v2/html/lex.go | 22 | ||||
| -rw-r--r-- | vendor/github.com/tdewolff/parse/v2/html/parse.go | 403 | 
8 files changed, 1900 insertions, 10 deletions
diff --git a/vendor/github.com/tdewolff/minify/v2/html/html.go b/vendor/github.com/tdewolff/minify/v2/html/html.go index 1a5aa9450..ea817037b 100644 --- a/vendor/github.com/tdewolff/minify/v2/html/html.go +++ b/vendor/github.com/tdewolff/minify/v2/html/html.go @@ -126,6 +126,7 @@ func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]st  				}  				w.Write(t.Data)  			} +			omitSpace = false  		case html.MathToken:  			if err := m.MinifyMimetype(mathMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil {  				if err != minify.ErrNotExist { @@ -133,6 +134,7 @@ func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]st  				}  				w.Write(t.Data)  			} +			omitSpace = false  		case html.TextToken:  			if t.HasTemplate {  				w.Write(t.Data) diff --git a/vendor/github.com/tdewolff/parse/v2/css/README.md b/vendor/github.com/tdewolff/parse/v2/css/README.md new file mode 100644 index 000000000..02797a711 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/README.md @@ -0,0 +1,170 @@ +# CSS [](https://pkg.go.dev/github.com/tdewolff/parse/v2/css?tab=doc) + +This package is a CSS3 lexer and parser written in [Go][1]. Both follow the specification at [CSS Syntax Module Level 3](http://www.w3.org/TR/css-syntax-3/). The lexer takes an io.Reader and converts it into tokens until the EOF. The parser returns a parse tree of the full io.Reader input stream, but the low-level `Next` function can be used for stream parsing to returns grammar units until the EOF. + +## Installation +Run the following command + +	go get -u github.com/tdewolff/parse/v2/css + +or add the following import and run project with `go get` + +	import "github.com/tdewolff/parse/v2/css" + +## Lexer +### Usage +The following initializes a new Lexer with io.Reader `r`: +``` go +l := css.NewLexer(parse.NewInput(r)) +``` + +To tokenize until EOF an error, use: +``` go +for { +	tt, text := l.Next() +	switch tt { +	case css.ErrorToken: +		// error or EOF set in l.Err() +		return +	// ... +	} +} +``` + +All tokens (see [CSS Syntax Module Level 3](http://www.w3.org/TR/css3-syntax/)): +``` go +ErrorToken			// non-official token, returned when errors occur +IdentToken +FunctionToken		// rgb( rgba( ... +AtKeywordToken		// @abc +HashToken			// #abc +StringToken +BadStringToken +URLToken			// url( +BadURLToken +DelimToken			// any unmatched character +NumberToken			// 5 +PercentageToken		// 5% +DimensionToken		// 5em +UnicodeRangeToken +IncludeMatchToken	// ~= +DashMatchToken		// |= +PrefixMatchToken	// ^= +SuffixMatchToken	// $= +SubstringMatchToken // *= +ColumnToken			// || +WhitespaceToken +CDOToken 			// <!-- +CDCToken 			// --> +ColonToken +SemicolonToken +CommaToken +BracketToken 		// ( ) [ ] { }, all bracket tokens use this, Data() can distinguish between the brackets +CommentToken		// non-official token +``` + +### Examples +``` go +package main + +import ( +	"os" + +	"github.com/tdewolff/parse/v2/css" +) + +// Tokenize CSS3 from stdin. +func main() { +	l := css.NewLexer(parse.NewInput(os.Stdin)) +	for { +		tt, text := l.Next() +		switch tt { +		case css.ErrorToken: +			if l.Err() != io.EOF { +				fmt.Println("Error on line", l.Line(), ":", l.Err()) +			} +			return +		case css.IdentToken: +			fmt.Println("Identifier", string(text)) +		case css.NumberToken: +			fmt.Println("Number", string(text)) +		// ... +		} +	} +} +``` + +## Parser +### Usage +The following creates a new Parser. +``` go +// true because this is the content of an inline style attribute +p := css.NewParser(parse.NewInput(bytes.NewBufferString("color: red;")), true) +``` + +To iterate over the stylesheet, use: +``` go +for { +    gt, _, data := p.Next() +    if gt == css.ErrorGrammar { +        break +    } +    // ... +} +``` + +All grammar units returned by `Next`: +``` go +ErrorGrammar +AtRuleGrammar +EndAtRuleGrammar +RulesetGrammar +EndRulesetGrammar +DeclarationGrammar +TokenGrammar +``` + +### Examples +``` go +package main + +import ( +	"bytes" +	"fmt" + +	"github.com/tdewolff/parse/v2/css" +) + +func main() { +	// true because this is the content of an inline style attribute +	p := css.NewParser(parse.NewInput(bytes.NewBufferString("color: red;")), true) +	out := "" +	for { +		gt, _, data := p.Next() +		if gt == css.ErrorGrammar { +			break +		} else if gt == css.AtRuleGrammar || gt == css.BeginAtRuleGrammar || gt == css.BeginRulesetGrammar || gt == css.DeclarationGrammar { +			out += string(data) +			if gt == css.DeclarationGrammar { +				out += ":" +			} +			for _, val := range p.Values() { +				out += string(val.Data) +			} +			if gt == css.BeginAtRuleGrammar || gt == css.BeginRulesetGrammar { +				out += "{" +			} else if gt == css.AtRuleGrammar || gt == css.DeclarationGrammar { +				out += ";" +			} +		} else { +			out += string(data) +		} +	} +	fmt.Println(out) +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" diff --git a/vendor/github.com/tdewolff/parse/v2/css/hash.go b/vendor/github.com/tdewolff/parse/v2/css/hash.go new file mode 100644 index 000000000..25d2f7cf0 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/hash.go @@ -0,0 +1,75 @@ +package css + +// generated by hasher -type=Hash -file=hash.go; DO NOT EDIT, except for adding more constants to the list and rerun go generate + +// uses github.com/tdewolff/hasher +//go:generate hasher -type=Hash -file=hash.go + +// Hash defines perfect hashes for a predefined list of strings +type Hash uint32 + +// Unique hash definitions to be used instead of strings +const ( +	Document  Hash = 0x8    // document +	Font_Face Hash = 0x809  // font-face +	Keyframes Hash = 0x1109 // keyframes +	Media     Hash = 0x2105 // media +	Page      Hash = 0x2604 // page +	Supports  Hash = 0x1908 // supports +) + +// String returns the hash' name. +func (i Hash) String() string { +	start := uint32(i >> 8) +	n := uint32(i & 0xff) +	if start+n > uint32(len(_Hash_text)) { +		return "" +	} +	return _Hash_text[start : start+n] +} + +// ToHash returns the hash whose name is s. It returns zero if there is no +// such hash. It is case sensitive. +func ToHash(s []byte) Hash { +	if len(s) == 0 || len(s) > _Hash_maxLen { +		return 0 +	} +	h := uint32(_Hash_hash0) +	for i := 0; i < len(s); i++ { +		h ^= uint32(s[i]) +		h *= 16777619 +	} +	if i := _Hash_table[h&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { +		t := _Hash_text[i>>8 : i>>8+i&0xff] +		for i := 0; i < len(s); i++ { +			if t[i] != s[i] { +				goto NEXT +			} +		} +		return i +	} +NEXT: +	if i := _Hash_table[(h>>16)&uint32(len(_Hash_table)-1)]; int(i&0xff) == len(s) { +		t := _Hash_text[i>>8 : i>>8+i&0xff] +		for i := 0; i < len(s); i++ { +			if t[i] != s[i] { +				return 0 +			} +		} +		return i +	} +	return 0 +} + +const _Hash_hash0 = 0x9acb0442 +const _Hash_maxLen = 9 +const _Hash_text = "documentfont-facekeyframesupportsmediapage" + +var _Hash_table = [1 << 3]Hash{ +	0x1: 0x2604, // page +	0x2: 0x2105, // media +	0x3: 0x809,  // font-face +	0x5: 0x1109, // keyframes +	0x6: 0x1908, // supports +	0x7: 0x8,    // document +} diff --git a/vendor/github.com/tdewolff/parse/v2/css/lex.go b/vendor/github.com/tdewolff/parse/v2/css/lex.go new file mode 100644 index 000000000..3d1ff7ea3 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/lex.go @@ -0,0 +1,698 @@ +// Package css is a CSS3 lexer and parser following the specifications at http://www.w3.org/TR/css-syntax-3/. +package css + +// TODO: \uFFFD replacement character for NULL bytes in strings for example, or atleast don't end the string early + +import ( +	"bytes" +	"io" +	"strconv" + +	"github.com/tdewolff/parse/v2" +) + +// TokenType determines the type of token, eg. a number or a semicolon. +type TokenType uint32 + +// TokenType values. +const ( +	ErrorToken TokenType = iota // extra token when errors occur +	IdentToken +	FunctionToken  // rgb( rgba( ... +	AtKeywordToken // @abc +	HashToken      // #abc +	StringToken +	BadStringToken +	URLToken +	BadURLToken +	DelimToken            // any unmatched character +	NumberToken           // 5 +	PercentageToken       // 5% +	DimensionToken        // 5em +	UnicodeRangeToken     // U+554A +	IncludeMatchToken     // ~= +	DashMatchToken        // |= +	PrefixMatchToken      // ^= +	SuffixMatchToken      // $= +	SubstringMatchToken   // *= +	ColumnToken           // || +	WhitespaceToken       // space \t \r \n \f +	CDOToken              // <!-- +	CDCToken              // --> +	ColonToken            // : +	SemicolonToken        // ; +	CommaToken            // , +	LeftBracketToken      // [ +	RightBracketToken     // ] +	LeftParenthesisToken  // ( +	RightParenthesisToken // ) +	LeftBraceToken        // { +	RightBraceToken       // } +	CommentToken          // extra token for comments +	EmptyToken +	CustomPropertyNameToken +	CustomPropertyValueToken +) + +// String returns the string representation of a TokenType. +func (tt TokenType) String() string { +	switch tt { +	case ErrorToken: +		return "Error" +	case IdentToken: +		return "Ident" +	case FunctionToken: +		return "Function" +	case AtKeywordToken: +		return "AtKeyword" +	case HashToken: +		return "Hash" +	case StringToken: +		return "String" +	case BadStringToken: +		return "BadString" +	case URLToken: +		return "URL" +	case BadURLToken: +		return "BadURL" +	case DelimToken: +		return "Delim" +	case NumberToken: +		return "Number" +	case PercentageToken: +		return "Percentage" +	case DimensionToken: +		return "Dimension" +	case UnicodeRangeToken: +		return "UnicodeRange" +	case IncludeMatchToken: +		return "IncludeMatch" +	case DashMatchToken: +		return "DashMatch" +	case PrefixMatchToken: +		return "PrefixMatch" +	case SuffixMatchToken: +		return "SuffixMatch" +	case SubstringMatchToken: +		return "SubstringMatch" +	case ColumnToken: +		return "Column" +	case WhitespaceToken: +		return "Whitespace" +	case CDOToken: +		return "CDO" +	case CDCToken: +		return "CDC" +	case ColonToken: +		return "Colon" +	case SemicolonToken: +		return "Semicolon" +	case CommaToken: +		return "Comma" +	case LeftBracketToken: +		return "LeftBracket" +	case RightBracketToken: +		return "RightBracket" +	case LeftParenthesisToken: +		return "LeftParenthesis" +	case RightParenthesisToken: +		return "RightParenthesis" +	case LeftBraceToken: +		return "LeftBrace" +	case RightBraceToken: +		return "RightBrace" +	case CommentToken: +		return "Comment" +	case EmptyToken: +		return "Empty" +	case CustomPropertyNameToken: +		return "CustomPropertyName" +	case CustomPropertyValueToken: +		return "CustomPropertyValue" +	} +	return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// Lexer is the state for the lexer. +type Lexer struct { +	r *parse.Input +} + +// NewLexer returns a new Lexer for a given io.Reader. +func NewLexer(r *parse.Input) *Lexer { +	return &Lexer{ +		r: r, +	} +} + +// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. +func (l *Lexer) Err() error { +	return l.r.Err() +} + +// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. +func (l *Lexer) Next() (TokenType, []byte) { +	switch l.r.Peek(0) { +	case ' ', '\t', '\n', '\r', '\f': +		l.r.Move(1) +		for l.consumeWhitespace() { +		} +		return WhitespaceToken, l.r.Shift() +	case ':': +		l.r.Move(1) +		return ColonToken, l.r.Shift() +	case ';': +		l.r.Move(1) +		return SemicolonToken, l.r.Shift() +	case ',': +		l.r.Move(1) +		return CommaToken, l.r.Shift() +	case '(', ')', '[', ']', '{', '}': +		if t := l.consumeBracket(); t != ErrorToken { +			return t, l.r.Shift() +		} +	case '#': +		if l.consumeHashToken() { +			return HashToken, l.r.Shift() +		} +	case '"', '\'': +		if t := l.consumeString(); t != ErrorToken { +			return t, l.r.Shift() +		} +	case '.', '+': +		if t := l.consumeNumeric(); t != ErrorToken { +			return t, l.r.Shift() +		} +	case '-': +		if t := l.consumeNumeric(); t != ErrorToken { +			return t, l.r.Shift() +		} else if t := l.consumeIdentlike(); t != ErrorToken { +			return t, l.r.Shift() +		} else if l.consumeCDCToken() { +			return CDCToken, l.r.Shift() +		} else if l.consumeCustomVariableToken() { +			return CustomPropertyNameToken, l.r.Shift() +		} +	case '@': +		if l.consumeAtKeywordToken() { +			return AtKeywordToken, l.r.Shift() +		} +	case '$', '*', '^', '~': +		if t := l.consumeMatch(); t != ErrorToken { +			return t, l.r.Shift() +		} +	case '/': +		if l.consumeComment() { +			return CommentToken, l.r.Shift() +		} +	case '<': +		if l.consumeCDOToken() { +			return CDOToken, l.r.Shift() +		} +	case '\\': +		if t := l.consumeIdentlike(); t != ErrorToken { +			return t, l.r.Shift() +		} +	case 'u', 'U': +		if l.consumeUnicodeRangeToken() { +			return UnicodeRangeToken, l.r.Shift() +		} else if t := l.consumeIdentlike(); t != ErrorToken { +			return t, l.r.Shift() +		} +	case '|': +		if t := l.consumeMatch(); t != ErrorToken { +			return t, l.r.Shift() +		} else if l.consumeColumnToken() { +			return ColumnToken, l.r.Shift() +		} +	case 0: +		if l.r.Err() != nil { +			return ErrorToken, nil +		} +	default: +		if t := l.consumeNumeric(); t != ErrorToken { +			return t, l.r.Shift() +		} else if t := l.consumeIdentlike(); t != ErrorToken { +			return t, l.r.Shift() +		} +	} +	// can't be rune because consumeIdentlike consumes that as an identifier +	l.r.Move(1) +	return DelimToken, l.r.Shift() +} + +//////////////////////////////////////////////////////////////// + +/* +The following functions follow the railroad diagrams in http://www.w3.org/TR/css3-syntax/ +*/ + +func (l *Lexer) consumeByte(c byte) bool { +	if l.r.Peek(0) == c { +		l.r.Move(1) +		return true +	} +	return false +} + +func (l *Lexer) consumeComment() bool { +	if l.r.Peek(0) != '/' || l.r.Peek(1) != '*' { +		return false +	} +	l.r.Move(2) +	for { +		c := l.r.Peek(0) +		if c == 0 && l.r.Err() != nil { +			break +		} else if c == '*' && l.r.Peek(1) == '/' { +			l.r.Move(2) +			return true +		} +		l.r.Move(1) +	} +	return true +} + +func (l *Lexer) consumeNewline() bool { +	c := l.r.Peek(0) +	if c == '\n' || c == '\f' { +		l.r.Move(1) +		return true +	} else if c == '\r' { +		if l.r.Peek(1) == '\n' { +			l.r.Move(2) +		} else { +			l.r.Move(1) +		} +		return true +	} +	return false +} + +func (l *Lexer) consumeWhitespace() bool { +	c := l.r.Peek(0) +	if c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' { +		l.r.Move(1) +		return true +	} +	return false +} + +func (l *Lexer) consumeDigit() bool { +	c := l.r.Peek(0) +	if c >= '0' && c <= '9' { +		l.r.Move(1) +		return true +	} +	return false +} + +func (l *Lexer) consumeHexDigit() bool { +	c := l.r.Peek(0) +	if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') { +		l.r.Move(1) +		return true +	} +	return false +} + +func (l *Lexer) consumeEscape() bool { +	if l.r.Peek(0) != '\\' { +		return false +	} +	mark := l.r.Pos() +	l.r.Move(1) +	if l.consumeNewline() { +		l.r.Rewind(mark) +		return false +	} else if l.consumeHexDigit() { +		for k := 1; k < 6; k++ { +			if !l.consumeHexDigit() { +				break +			} +		} +		l.consumeWhitespace() +		return true +	} else { +		c := l.r.Peek(0) +		if c >= 0xC0 { +			_, n := l.r.PeekRune(0) +			l.r.Move(n) +			return true +		} else if c == 0 && l.r.Err() != nil { +			l.r.Rewind(mark) +			return false +		} +	} +	l.r.Move(1) +	return true +} + +func (l *Lexer) consumeIdentToken() bool { +	mark := l.r.Pos() +	if l.r.Peek(0) == '-' { +		l.r.Move(1) +	} +	c := l.r.Peek(0) +	if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c >= 0x80) { +		if c != '\\' || !l.consumeEscape() { +			l.r.Rewind(mark) +			return false +		} +	} else { +		l.r.Move(1) +	} +	for { +		c := l.r.Peek(0) +		if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c >= 0x80) { +			if c != '\\' || !l.consumeEscape() { +				break +			} +		} else { +			l.r.Move(1) +		} +	} +	return true +} + +// support custom variables, https://www.w3.org/TR/css-variables-1/ +func (l *Lexer) consumeCustomVariableToken() bool { +	// expect to be on a '-' +	l.r.Move(1) +	if l.r.Peek(0) != '-' { +		l.r.Move(-1) +		return false +	} +	if !l.consumeIdentToken() { +		l.r.Move(-1) +		return false +	} +	return true +} + +func (l *Lexer) consumeAtKeywordToken() bool { +	// expect to be on an '@' +	l.r.Move(1) +	if !l.consumeIdentToken() { +		l.r.Move(-1) +		return false +	} +	return true +} + +func (l *Lexer) consumeHashToken() bool { +	// expect to be on a '#' +	mark := l.r.Pos() +	l.r.Move(1) +	c := l.r.Peek(0) +	if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c >= 0x80) { +		if c != '\\' || !l.consumeEscape() { +			l.r.Rewind(mark) +			return false +		} +	} else { +		l.r.Move(1) +	} +	for { +		c := l.r.Peek(0) +		if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' || c >= 0x80) { +			if c != '\\' || !l.consumeEscape() { +				break +			} +		} else { +			l.r.Move(1) +		} +	} +	return true +} + +func (l *Lexer) consumeNumberToken() bool { +	mark := l.r.Pos() +	c := l.r.Peek(0) +	if c == '+' || c == '-' { +		l.r.Move(1) +	} +	firstDigit := l.consumeDigit() +	if firstDigit { +		for l.consumeDigit() { +		} +	} +	if l.r.Peek(0) == '.' { +		l.r.Move(1) +		if l.consumeDigit() { +			for l.consumeDigit() { +			} +		} else if firstDigit { +			// . could belong to the next token +			l.r.Move(-1) +			return true +		} else { +			l.r.Rewind(mark) +			return false +		} +	} else if !firstDigit { +		l.r.Rewind(mark) +		return false +	} +	mark = l.r.Pos() +	c = l.r.Peek(0) +	if c == 'e' || c == 'E' { +		l.r.Move(1) +		c = l.r.Peek(0) +		if c == '+' || c == '-' { +			l.r.Move(1) +		} +		if !l.consumeDigit() { +			// e could belong to next token +			l.r.Rewind(mark) +			return true +		} +		for l.consumeDigit() { +		} +	} +	return true +} + +func (l *Lexer) consumeUnicodeRangeToken() bool { +	c := l.r.Peek(0) +	if (c != 'u' && c != 'U') || l.r.Peek(1) != '+' { +		return false +	} +	mark := l.r.Pos() +	l.r.Move(2) + +	// consume up to 6 hexDigits +	k := 0 +	for l.consumeHexDigit() { +		k++ +	} + +	// either a minus or a question mark or the end is expected +	if l.consumeByte('-') { +		if k == 0 || 6 < k { +			l.r.Rewind(mark) +			return false +		} + +		// consume another up to 6 hexDigits +		if l.consumeHexDigit() { +			k = 1 +			for l.consumeHexDigit() { +				k++ +			} +		} else { +			l.r.Rewind(mark) +			return false +		} +	} else if l.consumeByte('?') { +		// could be filled up to 6 characters with question marks or else regular hexDigits +		k++ +		for l.consumeByte('?') { +			k++ +		} +	} +	if k == 0 || 6 < k { +		l.r.Rewind(mark) +		return false +	} +	return true +} + +func (l *Lexer) consumeColumnToken() bool { +	if l.r.Peek(0) == '|' && l.r.Peek(1) == '|' { +		l.r.Move(2) +		return true +	} +	return false +} + +func (l *Lexer) consumeCDOToken() bool { +	if l.r.Peek(0) == '<' && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' { +		l.r.Move(4) +		return true +	} +	return false +} + +func (l *Lexer) consumeCDCToken() bool { +	if l.r.Peek(0) == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' { +		l.r.Move(3) +		return true +	} +	return false +} + +//////////////////////////////////////////////////////////////// + +// consumeMatch consumes any MatchToken. +func (l *Lexer) consumeMatch() TokenType { +	if l.r.Peek(1) == '=' { +		switch l.r.Peek(0) { +		case '~': +			l.r.Move(2) +			return IncludeMatchToken +		case '|': +			l.r.Move(2) +			return DashMatchToken +		case '^': +			l.r.Move(2) +			return PrefixMatchToken +		case '$': +			l.r.Move(2) +			return SuffixMatchToken +		case '*': +			l.r.Move(2) +			return SubstringMatchToken +		} +	} +	return ErrorToken +} + +// consumeBracket consumes any bracket token. +func (l *Lexer) consumeBracket() TokenType { +	switch l.r.Peek(0) { +	case '(': +		l.r.Move(1) +		return LeftParenthesisToken +	case ')': +		l.r.Move(1) +		return RightParenthesisToken +	case '[': +		l.r.Move(1) +		return LeftBracketToken +	case ']': +		l.r.Move(1) +		return RightBracketToken +	case '{': +		l.r.Move(1) +		return LeftBraceToken +	case '}': +		l.r.Move(1) +		return RightBraceToken +	} +	return ErrorToken +} + +// consumeNumeric consumes NumberToken, PercentageToken or DimensionToken. +func (l *Lexer) consumeNumeric() TokenType { +	if l.consumeNumberToken() { +		if l.consumeByte('%') { +			return PercentageToken +		} else if l.consumeIdentToken() { +			return DimensionToken +		} +		return NumberToken +	} +	return ErrorToken +} + +// consumeString consumes a string and may return BadStringToken when a newline is encountered. +func (l *Lexer) consumeString() TokenType { +	// assume to be on " or ' +	delim := l.r.Peek(0) +	l.r.Move(1) +	for { +		c := l.r.Peek(0) +		if c == 0 && l.r.Err() != nil { +			break +		} else if c == '\n' || c == '\r' || c == '\f' { +			l.r.Move(1) +			return BadStringToken +		} else if c == delim { +			l.r.Move(1) +			break +		} else if c == '\\' { +			if !l.consumeEscape() { +				// either newline or EOF after backslash +				l.r.Move(1) +				l.consumeNewline() +			} +		} else { +			l.r.Move(1) +		} +	} +	return StringToken +} + +func (l *Lexer) consumeUnquotedURL() bool { +	for { +		c := l.r.Peek(0) +		if c == 0 && l.r.Err() != nil || c == ')' { +			break +		} else if c == '"' || c == '\'' || c == '(' || c == '\\' || c == ' ' || c <= 0x1F || c == 0x7F { +			if c != '\\' || !l.consumeEscape() { +				return false +			} +		} else { +			l.r.Move(1) +		} +	} +	return true +} + +// consumeRemnantsBadUrl consumes bytes of a BadUrlToken so that normal tokenization may continue. +func (l *Lexer) consumeRemnantsBadURL() { +	for { +		if l.consumeByte(')') || l.r.Err() != nil { +			break +		} else if !l.consumeEscape() { +			l.r.Move(1) +		} +	} +} + +// consumeIdentlike consumes IdentToken, FunctionToken or UrlToken. +func (l *Lexer) consumeIdentlike() TokenType { +	if l.consumeIdentToken() { +		if l.r.Peek(0) != '(' { +			return IdentToken +		} else if !parse.EqualFold(bytes.Replace(l.r.Lexeme(), []byte{'\\'}, nil, -1), []byte{'u', 'r', 'l'}) { +			l.r.Move(1) +			return FunctionToken +		} +		l.r.Move(1) + +		// consume url +		for l.consumeWhitespace() { +		} +		if c := l.r.Peek(0); c == '"' || c == '\'' { +			if l.consumeString() == BadStringToken { +				l.consumeRemnantsBadURL() +				return BadURLToken +			} +		} else if !l.consumeUnquotedURL() && !l.consumeWhitespace() { // if unquoted URL fails due to encountering whitespace, continue +			l.consumeRemnantsBadURL() +			return BadURLToken +		} +		for l.consumeWhitespace() { +		} +		if !l.consumeByte(')') && l.r.Err() != io.EOF { +			l.consumeRemnantsBadURL() +			return BadURLToken +		} +		return URLToken +	} +	return ErrorToken +} diff --git a/vendor/github.com/tdewolff/parse/v2/css/parse.go b/vendor/github.com/tdewolff/parse/v2/css/parse.go new file mode 100644 index 000000000..381db4146 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/parse.go @@ -0,0 +1,493 @@ +package css + +import ( +	"bytes" +	"fmt" +	"strconv" + +	"github.com/tdewolff/parse/v2" +	"github.com/tdewolff/parse/v2/buffer" +) + +var wsBytes = []byte(" ") +var endBytes = []byte("}") +var emptyBytes = []byte("") + +// GrammarType determines the type of grammar. +type GrammarType uint32 + +// GrammarType values. +const ( +	ErrorGrammar GrammarType = iota // extra token when errors occur +	CommentGrammar +	AtRuleGrammar +	BeginAtRuleGrammar +	EndAtRuleGrammar +	QualifiedRuleGrammar +	BeginRulesetGrammar +	EndRulesetGrammar +	DeclarationGrammar +	TokenGrammar +	CustomPropertyGrammar +) + +// String returns the string representation of a GrammarType. +func (tt GrammarType) String() string { +	switch tt { +	case ErrorGrammar: +		return "Error" +	case CommentGrammar: +		return "Comment" +	case AtRuleGrammar: +		return "AtRule" +	case BeginAtRuleGrammar: +		return "BeginAtRule" +	case EndAtRuleGrammar: +		return "EndAtRule" +	case QualifiedRuleGrammar: +		return "QualifiedRule" +	case BeginRulesetGrammar: +		return "BeginRuleset" +	case EndRulesetGrammar: +		return "EndRuleset" +	case DeclarationGrammar: +		return "Declaration" +	case TokenGrammar: +		return "Token" +	case CustomPropertyGrammar: +		return "CustomProperty" +	} +	return "Invalid(" + strconv.Itoa(int(tt)) + ")" +} + +//////////////////////////////////////////////////////////////// + +// State is the state function the parser currently is in. +type State func(*Parser) GrammarType + +// Token is a single TokenType and its associated data. +type Token struct { +	TokenType +	Data []byte +} + +func (t Token) String() string { +	return t.TokenType.String() + "('" + string(t.Data) + "')" +} + +// Parser is the state for the parser. +type Parser struct { +	l      *Lexer +	state  []State +	err    string +	errPos int + +	buf   []Token +	level int + +	data        []byte +	tt          TokenType +	keepWS      bool +	prevWS      bool +	prevEnd     bool +	prevComment bool +} + +// NewParser returns a new CSS parser from an io.Reader. isInline specifies whether this is an inline style attribute. +func NewParser(r *parse.Input, isInline bool) *Parser { +	l := NewLexer(r) +	p := &Parser{ +		l:     l, +		state: make([]State, 0, 4), +	} + +	if isInline { +		p.state = append(p.state, (*Parser).parseDeclarationList) +	} else { +		p.state = append(p.state, (*Parser).parseStylesheet) +	} +	return p +} + +// HasParseError returns true if there is a parse error (and not a read error). +func (p *Parser) HasParseError() bool { +	return p.err != "" +} + +// Err returns the error encountered during parsing, this is often io.EOF but also other errors can be returned. +func (p *Parser) Err() error { +	if p.err != "" { +		r := buffer.NewReader(p.l.r.Bytes()) +		return parse.NewError(r, p.errPos, p.err) +	} +	return p.l.Err() +} + +// Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message. +func (p *Parser) Next() (GrammarType, TokenType, []byte) { +	p.err = "" + +	if p.prevEnd { +		p.tt, p.data = RightBraceToken, endBytes +		p.prevEnd = false +	} else { +		p.tt, p.data = p.popToken(true) +	} +	gt := p.state[len(p.state)-1](p) +	return gt, p.tt, p.data +} + +// Offset return offset for current Grammar +func (p *Parser) Offset() int { +	return p.l.r.Offset() +} + +// Values returns a slice of Tokens for the last Grammar. Only AtRuleGrammar, BeginAtRuleGrammar, BeginRulesetGrammar and Declaration will return the at-rule components, ruleset selector and declaration values respectively. +func (p *Parser) Values() []Token { +	return p.buf +} + +func (p *Parser) popToken(allowComment bool) (TokenType, []byte) { +	p.prevWS = false +	p.prevComment = false +	tt, data := p.l.Next() +	for !p.keepWS && tt == WhitespaceToken || tt == CommentToken { +		if tt == WhitespaceToken { +			p.prevWS = true +		} else { +			p.prevComment = true +			if allowComment && len(p.state) == 1 { +				break +			} +		} +		tt, data = p.l.Next() +	} +	return tt, data +} + +func (p *Parser) initBuf() { +	p.buf = p.buf[:0] +} + +func (p *Parser) pushBuf(tt TokenType, data []byte) { +	p.buf = append(p.buf, Token{tt, data}) +} + +//////////////////////////////////////////////////////////////// + +func (p *Parser) parseStylesheet() GrammarType { +	if p.tt == CDOToken || p.tt == CDCToken { +		return TokenGrammar +	} else if p.tt == AtKeywordToken { +		return p.parseAtRule() +	} else if p.tt == CommentToken { +		return CommentGrammar +	} else if p.tt == ErrorToken { +		return ErrorGrammar +	} +	return p.parseQualifiedRule() +} + +func (p *Parser) parseDeclarationList() GrammarType { +	if p.tt == CommentToken { +		p.tt, p.data = p.popToken(false) +	} +	for p.tt == SemicolonToken { +		p.tt, p.data = p.popToken(false) +	} + +	// IE hack: *color:red; +	if p.tt == DelimToken && p.data[0] == '*' { +		tt, data := p.popToken(false) +		p.tt = tt +		p.data = append(p.data, data...) +	} + +	if p.tt == ErrorToken { +		return ErrorGrammar +	} else if p.tt == AtKeywordToken { +		return p.parseAtRule() +	} else if p.tt == IdentToken || p.tt == DelimToken { +		return p.parseDeclaration() +	} else if p.tt == CustomPropertyNameToken { +		return p.parseCustomProperty() +	} + +	// parse error +	p.initBuf() +	p.l.r.Move(-len(p.data)) +	p.err, p.errPos = fmt.Sprintf("unexpected token '%s' in declaration", string(p.data)), p.l.r.Offset() +	p.l.r.Move(len(p.data)) + +	if p.tt == RightBraceToken { +		// right brace token will occur when we've had a decl error that ended in a right brace token +		// as these are not handled by decl error, we handle it here explicitly. Normally its used to end eg. the qual rule. +		p.pushBuf(p.tt, p.data) +		return ErrorGrammar +	} +	return p.parseDeclarationError(p.tt, p.data) +} + +//////////////////////////////////////////////////////////////// + +func (p *Parser) parseAtRule() GrammarType { +	p.initBuf() +	p.data = parse.ToLower(parse.Copy(p.data)) +	atRuleName := p.data +	if len(atRuleName) > 0 && atRuleName[1] == '-' { +		if i := bytes.IndexByte(atRuleName[2:], '-'); i != -1 { +			atRuleName = atRuleName[i+2:] // skip vendor specific prefix +		} +	} +	atRule := ToHash(atRuleName[1:]) + +	first := true +	skipWS := false +	for { +		tt, data := p.popToken(false) +		if tt == LeftBraceToken && p.level == 0 { +			if atRule == Font_Face || atRule == Page { +				p.state = append(p.state, (*Parser).parseAtRuleDeclarationList) +			} else if atRule == Document || atRule == Keyframes || atRule == Media || atRule == Supports { +				p.state = append(p.state, (*Parser).parseAtRuleRuleList) +			} else { +				p.state = append(p.state, (*Parser).parseAtRuleUnknown) +			} +			return BeginAtRuleGrammar +		} else if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { +			p.prevEnd = (tt == RightBraceToken) +			return AtRuleGrammar +		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { +			p.level++ +		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { +			if p.level == 0 { +				// TODO: buggy +				p.pushBuf(tt, data) +				if 1 < len(p.state) { +					p.state = p.state[:len(p.state)-1] +				} +				p.err, p.errPos = "unexpected ending in at rule", p.l.r.Offset() +				return ErrorGrammar +			} +			p.level-- +		} +		if first { +			if tt == LeftParenthesisToken || tt == LeftBracketToken { +				p.prevWS = false +			} +			first = false +		} +		if len(data) == 1 && (data[0] == ',' || data[0] == ':') { +			skipWS = true +		} else if p.prevWS && !skipWS && tt != RightParenthesisToken { +			p.pushBuf(WhitespaceToken, wsBytes) +		} else { +			skipWS = false +		} +		if tt == LeftParenthesisToken { +			skipWS = true +		} +		p.pushBuf(tt, data) +	} +} + +func (p *Parser) parseAtRuleRuleList() GrammarType { +	if p.tt == RightBraceToken || p.tt == ErrorToken { +		p.state = p.state[:len(p.state)-1] +		return EndAtRuleGrammar +	} else if p.tt == AtKeywordToken { +		return p.parseAtRule() +	} else { +		return p.parseQualifiedRule() +	} +} + +func (p *Parser) parseAtRuleDeclarationList() GrammarType { +	for p.tt == SemicolonToken { +		p.tt, p.data = p.popToken(false) +	} +	if p.tt == RightBraceToken || p.tt == ErrorToken { +		p.state = p.state[:len(p.state)-1] +		return EndAtRuleGrammar +	} +	return p.parseDeclarationList() +} + +func (p *Parser) parseAtRuleUnknown() GrammarType { +	p.keepWS = true +	if p.tt == RightBraceToken && p.level == 0 || p.tt == ErrorToken { +		p.state = p.state[:len(p.state)-1] +		p.keepWS = false +		return EndAtRuleGrammar +	} +	if p.tt == LeftParenthesisToken || p.tt == LeftBraceToken || p.tt == LeftBracketToken || p.tt == FunctionToken { +		p.level++ +	} else if p.tt == RightParenthesisToken || p.tt == RightBraceToken || p.tt == RightBracketToken { +		p.level-- +	} +	return TokenGrammar +} + +func (p *Parser) parseQualifiedRule() GrammarType { +	p.initBuf() +	first := true +	inAttrSel := false +	skipWS := true +	var tt TokenType +	var data []byte +	for { +		if first { +			tt, data = p.tt, p.data +			p.tt = WhitespaceToken +			p.data = emptyBytes +			first = false +		} else { +			tt, data = p.popToken(false) +		} +		if tt == LeftBraceToken && p.level == 0 { +			p.state = append(p.state, (*Parser).parseQualifiedRuleDeclarationList) +			return BeginRulesetGrammar +		} else if tt == ErrorToken { +			p.err, p.errPos = "unexpected ending in qualified rule", p.l.r.Offset() +			return ErrorGrammar +		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { +			p.level++ +		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { +			if p.level == 0 { +				// TODO: buggy +				p.pushBuf(tt, data) +				if 1 < len(p.state) { +					p.state = p.state[:len(p.state)-1] +				} +				p.err, p.errPos = "unexpected ending in qualified rule", p.l.r.Offset() +				return ErrorGrammar +			} +			p.level-- +		} +		if len(data) == 1 && (data[0] == ',' || data[0] == '>' || data[0] == '+' || data[0] == '~') { +			if data[0] == ',' { +				return QualifiedRuleGrammar +			} +			skipWS = true +		} else if p.prevWS && !skipWS && !inAttrSel { +			p.pushBuf(WhitespaceToken, wsBytes) +		} else { +			skipWS = false +		} +		if tt == LeftBracketToken { +			inAttrSel = true +		} else if tt == RightBracketToken { +			inAttrSel = false +		} +		p.pushBuf(tt, data) +	} +} + +func (p *Parser) parseQualifiedRuleDeclarationList() GrammarType { +	for p.tt == SemicolonToken { +		p.tt, p.data = p.popToken(false) +	} +	if p.tt == RightBraceToken || p.tt == ErrorToken { +		p.state = p.state[:len(p.state)-1] +		return EndRulesetGrammar +	} +	return p.parseDeclarationList() +} + +func (p *Parser) parseDeclaration() GrammarType { +	p.initBuf() +	p.data = parse.ToLower(parse.Copy(p.data)) + +	ttName, dataName := p.tt, p.data +	tt, data := p.popToken(false) +	if tt != ColonToken { +		p.l.r.Move(-len(data)) +		p.err, p.errPos = "expected colon in declaration", p.l.r.Offset() +		p.l.r.Move(len(data)) +		p.pushBuf(ttName, dataName) +		return p.parseDeclarationError(tt, data) +	} + +	skipWS := true +	for { +		tt, data := p.popToken(false) +		if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { +			p.prevEnd = (tt == RightBraceToken) +			return DeclarationGrammar +		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { +			p.level++ +		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { +			if p.level == 0 { +				// TODO: buggy +				p.err, p.errPos = "unexpected ending in declaration", p.l.r.Offset() +				p.pushBuf(ttName, dataName) +				p.pushBuf(ColonToken, []byte{':'}) +				return p.parseDeclarationError(tt, data) +			} +			p.level-- +		} +		if len(data) == 1 && (data[0] == ',' || data[0] == '/' || data[0] == ':' || data[0] == '!' || data[0] == '=') { +			skipWS = true +		} else if (p.prevWS || p.prevComment) && !skipWS { +			p.pushBuf(WhitespaceToken, wsBytes) +		} else { +			skipWS = false +		} +		p.pushBuf(tt, data) +	} +} + +func (p *Parser) parseDeclarationError(tt TokenType, data []byte) GrammarType { +	// we're on the offending (tt,data), keep popping tokens till we reach ;, }, or EOF +	p.tt, p.data = tt, data +	for { +		if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { +			p.prevEnd = (tt == RightBraceToken) +			if tt == SemicolonToken { +				p.pushBuf(tt, data) +			} +			return ErrorGrammar +		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { +			p.level++ +		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { +			p.level-- +		} + +		if p.prevWS { +			p.pushBuf(WhitespaceToken, wsBytes) +		} +		p.pushBuf(tt, data) + +		tt, data = p.popToken(false) +	} +} + +func (p *Parser) parseCustomProperty() GrammarType { +	p.initBuf() +	if tt, data := p.popToken(false); tt != ColonToken { +		p.l.r.Move(-len(data)) +		p.err, p.errPos = "expected colon in custom property", p.l.r.Offset() +		p.l.r.Move(len(data)) +		return ErrorGrammar +	} +	val := []byte{} +	for { +		tt, data := p.l.Next() +		if (tt == SemicolonToken || tt == RightBraceToken) && p.level == 0 || tt == ErrorToken { +			p.prevEnd = (tt == RightBraceToken) +			p.pushBuf(CustomPropertyValueToken, val) +			return CustomPropertyGrammar +		} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken { +			p.level++ +		} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken { +			if p.level == 0 { +				// TODO: buggy +				p.pushBuf(tt, data) +				p.err, p.errPos = "unexpected ending in custom property", p.l.r.Offset() +				return ErrorGrammar +			} +			p.level-- +		} +		val = append(val, data...) +	} +} diff --git a/vendor/github.com/tdewolff/parse/v2/css/util.go b/vendor/github.com/tdewolff/parse/v2/css/util.go new file mode 100644 index 000000000..20b99a711 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/css/util.go @@ -0,0 +1,47 @@ +package css + +import "github.com/tdewolff/parse/v2" + +// IsIdent returns true if the bytes are a valid identifier. +func IsIdent(b []byte) bool { +	l := NewLexer(parse.NewInputBytes(b)) +	l.consumeIdentToken() +	l.r.Restore() +	return l.r.Pos() == len(b) +} + +// IsURLUnquoted returns true if the bytes are a valid unquoted URL. +func IsURLUnquoted(b []byte) bool { +	l := NewLexer(parse.NewInputBytes(b)) +	l.consumeUnquotedURL() +	l.r.Restore() +	return l.r.Pos() == len(b) +} + +// HSL2RGB converts HSL to RGB with all of range [0,1] +// from http://www.w3.org/TR/css3-color/#hsl-color +func HSL2RGB(h, s, l float64) (float64, float64, float64) { +	m2 := l * (s + 1) +	if l > 0.5 { +		m2 = l + s - l*s +	} +	m1 := l*2 - m2 +	return hue2rgb(m1, m2, h+1.0/3.0), hue2rgb(m1, m2, h), hue2rgb(m1, m2, h-1.0/3.0) +} + +func hue2rgb(m1, m2, h float64) float64 { +	if h < 0.0 { +		h += 1.0 +	} +	if h > 1.0 { +		h -= 1.0 +	} +	if h*6.0 < 1.0 { +		return m1 + (m2-m1)*h*6.0 +	} else if h*2.0 < 1.0 { +		return m2 +	} else if h*3.0 < 2.0 { +		return m1 + (m2-m1)*(2.0/3.0-h)*6.0 +	} +	return m1 +} diff --git a/vendor/github.com/tdewolff/parse/v2/html/lex.go b/vendor/github.com/tdewolff/parse/v2/html/lex.go index e3cb9bd04..c000edccc 100644 --- a/vendor/github.com/tdewolff/parse/v2/html/lex.go +++ b/vendor/github.com/tdewolff/parse/v2/html/lex.go @@ -166,6 +166,7 @@ func (l *Lexer) Next() (TokenType, []byte) {  			isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil)  			if !isEndTag && (c < 'a' || 'z' < c) && (c < 'A' || 'Z' < c) && c != '!' && c != '?' {  				// not a tag +				l.r.Move(1)  			} else if 0 < l.r.Pos() {  				// return currently buffered texttoken so that we can return tag next iteration  				l.text = l.r.Shift() @@ -202,8 +203,9 @@ func (l *Lexer) Next() (TokenType, []byte) {  				return TextToken, l.text  			}  			return ErrorToken, nil +		} else { +			l.r.Move(1)  		} -		l.r.Move(1)  	}  } @@ -539,19 +541,19 @@ func (l *Lexer) shiftXML(rawTag Hash) []byte {  func (l *Lexer) moveTemplate() {  	for { -		if c := l.r.Peek(0); l.at(l.tmplEnd...) || c == 0 && l.r.Err() != nil { -			if c != 0 { -				l.r.Move(len(l.tmplEnd)) -			} -			break +		if c := l.r.Peek(0); c == 0 && l.r.Err() != nil { +			return +		} else if l.at(l.tmplEnd...) { +			l.r.Move(len(l.tmplEnd)) +			return  		} else if c == '"' || c == '\'' {  			l.r.Move(1)  			escape := false  			for { -				if c2 := l.r.Peek(0); !escape && c2 == c || c2 == 0 && l.r.Err() != nil { -					if c2 != 0 { -						l.r.Move(1) -					} +				if c2 := l.r.Peek(0); c2 == 0 && l.r.Err() != nil { +					return +				} else if !escape && c2 == c { +					l.r.Move(1)  					break  				} else if c2 == '\\' {  					escape = !escape diff --git a/vendor/github.com/tdewolff/parse/v2/html/parse.go b/vendor/github.com/tdewolff/parse/v2/html/parse.go new file mode 100644 index 000000000..b7e1ba3dd --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/html/parse.go @@ -0,0 +1,403 @@ +package html + +import ( +	"bytes" +	"fmt" +	"io" +	"strings" + +	"github.com/tdewolff/parse/v2" +	"github.com/tdewolff/parse/v2/css" +) + +type AST struct { +	Children []*Tag +	Text     []byte +} + +func (ast *AST) String() string { +	sb := strings.Builder{} +	for i, child := range ast.Children { +		if i != 0 { +			sb.WriteString("\n") +		} +		sb.WriteString(child.ASTString()) +	} +	return sb.String() +} + +type Attr struct { +	Key, Val []byte +} + +func (attr *Attr) String() string { +	return fmt.Sprintf(`%s="%s"`, string(attr.Key), string(attr.Val)) +} + +type Tag struct { +	Root       *AST +	Parent     *Tag +	Prev, Next *Tag +	Children   []*Tag +	Index      int + +	Name               []byte +	Attrs              []Attr +	textStart, textEnd int +} + +func (tag *Tag) getAttr(key []byte) ([]byte, bool) { +	for _, attr := range tag.Attrs { +		if bytes.Equal(key, attr.Key) { +			return attr.Val, true +		} +	} +	return nil, false +} + +func (tag *Tag) GetAttr(key string) (string, bool) { +	val, ok := tag.getAttr([]byte(key)) +	return string(val), ok +} + +func (tag *Tag) Text() string { +	return string(tag.Root.Text[tag.textStart:tag.textEnd]) +} + +func (tag *Tag) String() string { +	sb := strings.Builder{} +	sb.WriteString("<") +	sb.Write(tag.Name) +	for _, attr := range tag.Attrs { +		sb.WriteString(" ") +		sb.WriteString(attr.String()) +	} +	sb.WriteString(">") +	return sb.String() +} + +func (tag *Tag) ASTString() string { +	sb := strings.Builder{} +	sb.WriteString(tag.String()) +	for _, child := range tag.Children { +		sb.WriteString("\n  ") +		s := child.ASTString() +		s = strings.ReplaceAll(s, "\n", "\n  ") +		sb.WriteString(s) +	} +	return sb.String() +} + +func Parse(r *parse.Input) (*AST, error) { +	ast := &AST{} +	root := &Tag{} +	cur := root + +	l := NewLexer(r) +	for { +		tt, data := l.Next() +		switch tt { +		case ErrorToken: +			if err := l.Err(); err != io.EOF { +				return nil, err +			} +			ast.Children = root.Children +			return ast, nil +		case TextToken: +			ast.Text = append(ast.Text, data...) +		case StartTagToken: +			child := &Tag{ +				Root:      ast, +				Parent:    cur, +				Index:     len(cur.Children), +				Name:      l.Text(), +				textStart: len(ast.Text), +			} +			if 0 < len(cur.Children) { +				child.Prev = cur.Children[len(cur.Children)-1] +				child.Prev.Next = child +			} +			cur.Children = append(cur.Children, child) +			cur = child +		case AttributeToken: +			val := l.AttrVal() +			if 0 < len(val) && (val[0] == '"' || val[0] == '\'') { +				val = val[1 : len(val)-1] +			} +			cur.Attrs = append(cur.Attrs, Attr{l.AttrKey(), val}) +		case StartTagCloseToken: +			if voidTags[string(cur.Name)] { +				cur.textEnd = len(ast.Text) +				cur = cur.Parent +			} +		case EndTagToken, StartTagVoidToken: +			start := cur +			for start != root && !bytes.Equal(l.Text(), start.Name) { +				start = start.Parent +			} +			if start == root { +				// ignore +			} else { +				parent := start.Parent +				for cur != parent { +					cur.textEnd = len(ast.Text) +					cur = cur.Parent +				} +			} +		} +	} +} + +func (ast *AST) Query(s string) (*Tag, error) { +	sel, err := ParseSelector(s) +	if err != nil { +		return nil, err +	} + +	for _, child := range ast.Children { +		if match := child.query(sel); match != nil { +			return match, nil +		} +	} +	return nil, nil +} + +func (tag *Tag) query(sel selector) *Tag { +	if sel.AppliesTo(tag) { +		return tag +	} +	for _, child := range tag.Children { +		if match := child.query(sel); match != nil { +			return match +		} +	} +	return nil +} + +func (ast *AST) QueryAll(s string) ([]*Tag, error) { +	sel, err := ParseSelector(s) +	if err != nil { +		return nil, err +	} + +	matches := []*Tag{} +	for _, child := range ast.Children { +		child.queryAll(&matches, sel) +	} +	return matches, nil +} + +func (tag *Tag) queryAll(matches *[]*Tag, sel selector) { +	if sel.AppliesTo(tag) { +		*matches = append(*matches, tag) +	} +	for _, child := range tag.Children { +		child.queryAll(matches, sel) +	} +} + +type attrSelector struct { +	op   byte // empty, =, ~, | +	attr []byte +	val  []byte +} + +func (sel attrSelector) AppliesTo(tag *Tag) bool { +	val, ok := tag.getAttr(sel.attr) +	if !ok { +		return false +	} + +	switch sel.op { +	case 0: +		return true +	case '=': +		return bytes.Equal(val, sel.val) +	case '~': +		if 0 < len(sel.val) { +			vals := bytes.Split(val, []byte(" ")) +			for _, val := range vals { +				if bytes.Equal(val, sel.val) { +					return true +				} +			} +		} +	case '|': +		return bytes.Equal(val, sel.val) || bytes.HasPrefix(val, append(sel.val, '-')) +	} +	return false +} + +func (attr attrSelector) String() string { +	sb := strings.Builder{} +	sb.Write(attr.attr) +	if attr.op != 0 { +		sb.WriteByte(attr.op) +		if attr.op != '=' { +			sb.WriteByte('=') +		} +		sb.WriteByte('"') +		sb.Write(attr.val) +		sb.WriteByte('"') +	} +	return sb.String() +} + +type selectorNode struct { +	typ   []byte // is * for universal +	attrs []attrSelector +	op    byte // space or >, last is NULL +} + +func (sel selectorNode) AppliesTo(tag *Tag) bool { +	if 0 < len(sel.typ) && !bytes.Equal(sel.typ, []byte("*")) && !bytes.Equal(sel.typ, tag.Name) { +		return false +	} +	for _, attr := range sel.attrs { +		if !attr.AppliesTo(tag) { +			return false +		} +	} +	return true +} + +func (sel selectorNode) String() string { +	sb := strings.Builder{} +	sb.Write(sel.typ) +	for _, attr := range sel.attrs { +		if bytes.Equal(attr.attr, []byte("id")) && attr.op == '=' { +			sb.WriteByte('#') +			sb.Write(attr.val) +		} else if bytes.Equal(attr.attr, []byte("class")) && attr.op == '~' { +			sb.WriteByte('.') +			sb.Write(attr.val) +		} else { +			sb.WriteByte('[') +			sb.WriteString(attr.String()) +			sb.WriteByte(']') +		} +	} +	if sel.op != 0 { +		sb.WriteByte(' ') +		sb.WriteByte(sel.op) +		sb.WriteByte(' ') +	} +	return sb.String() +} + +type token struct { +	tt   css.TokenType +	data []byte +} + +type selector []selectorNode + +func ParseSelector(s string) (selector, error) { +	ts := []token{} +	l := css.NewLexer(parse.NewInputString(s)) +	for { +		tt, data := l.Next() +		if tt == css.ErrorToken { +			if err := l.Err(); err != io.EOF { +				return selector{}, err +			} +			break +		} +		ts = append(ts, token{ +			tt:   tt, +			data: data, +		}) +	} + +	sel := selector{} +	node := selectorNode{} +	for i := 0; i < len(ts); i++ { +		t := ts[i] +		if 0 < i && (t.tt == css.WhitespaceToken || t.tt == css.DelimToken && t.data[0] == '>') { +			if t.tt == css.DelimToken { +				node.op = '>' +			} else { +				node.op = ' ' +			} +			sel = append(sel, node) +			node = selectorNode{} +		} else if t.tt == css.IdentToken || t.tt == css.DelimToken && t.data[0] == '*' { +			node.typ = t.data +		} else if t.tt == css.DelimToken && (t.data[0] == '.' || t.data[0] == '#') && i+1 < len(ts) && ts[i+1].tt == css.IdentToken { +			if t.data[0] == '#' { +				node.attrs = append(node.attrs, attrSelector{op: '=', attr: []byte("id"), val: ts[i+1].data}) +			} else { +				node.attrs = append(node.attrs, attrSelector{op: '~', attr: []byte("class"), val: ts[i+1].data}) +			} +			i++ +		} else if t.tt == css.DelimToken && t.data[0] == '[' && i+2 < len(ts) && ts[i+1].tt == css.IdentToken && ts[i+2].tt == css.DelimToken { +			if ts[i+2].data[0] == ']' { +				node.attrs = append(node.attrs, attrSelector{op: 0, attr: ts[i+1].data}) +				i += 2 +			} else if i+4 < len(ts) && ts[i+3].tt == css.IdentToken && ts[i+4].tt == css.DelimToken && ts[i+4].data[0] == ']' { +				node.attrs = append(node.attrs, attrSelector{op: ts[i+2].data[0], attr: ts[i+1].data, val: ts[i+3].data}) +				i += 4 +			} +		} +	} +	sel = append(sel, node) +	return sel, nil +} + +func (sels selector) AppliesTo(tag *Tag) bool { +	if len(sels) == 0 { +		return true +	} else if !sels[len(sels)-1].AppliesTo(tag) { +		return false +	} + +	tag = tag.Parent +	isel := len(sels) - 2 +	for 0 <= isel && tag != nil { +		switch sels[isel].op { +		case ' ': +			for tag != nil { +				if sels[isel].AppliesTo(tag) { +					break +				} +				tag = tag.Parent +			} +		case '>': +			if !sels[isel].AppliesTo(tag) { +				return false +			} +			tag = tag.Parent +		default: +			return false +		} +		isel-- +	} +	return len(sels) != 0 && isel == -1 +} + +func (sels selector) String() string { +	if len(sels) == 0 { +		return "" +	} +	sb := strings.Builder{} +	for _, sel := range sels { +		sb.WriteString(sel.String()) +	} +	return sb.String()[1:] +} + +var voidTags = map[string]bool{ +	"area":   true, +	"base":   true, +	"br":     true, +	"col":    true, +	"embed":  true, +	"hr":     true, +	"img":    true, +	"input":  true, +	"link":   true, +	"meta":   true, +	"source": true, +	"track":  true, +	"wbr":    true, +}  | 
