1 files changed, 0 insertions, 494 deletions
diff --git a/vendor/github.com/tdewolff/parse/v2/html/lex.go b/vendor/github.com/tdewolff/parse/v2/html/lex.go
deleted file mode 100644
index 4325024bd..000000000
--- a/vendor/github.com/tdewolff/parse/v2/html/lex.go
+++ /dev/null
@@ -1,494 +0,0 @@
-// Package html is an HTML5 lexer following the specifications at http://www.w3.org/TR/html5/syntax.html.
-package html
-
-import (
-	"strconv"
-
-	"github.com/tdewolff/parse/v2"
-)
-
-// TokenType determines the type of token, eg. a number or a semicolon.
-type TokenType uint32
-
-// TokenType values.
-const (
-	ErrorToken TokenType = iota // extra token when errors occur
-	CommentToken
-	DoctypeToken
-	StartTagToken
-	StartTagCloseToken
-	StartTagVoidToken
-	EndTagToken
-	AttributeToken
-	TextToken
-	SvgToken
-	MathToken
-)
-
-// String returns the string representation of a TokenType.
-func (tt TokenType) String() string {
-	switch tt {
-	case ErrorToken:
-		return "Error"
-	case CommentToken:
-		return "Comment"
-	case DoctypeToken:
-		return "Doctype"
-	case StartTagToken:
-		return "StartTag"
-	case StartTagCloseToken:
-		return "StartTagClose"
-	case StartTagVoidToken:
-		return "StartTagVoid"
-	case EndTagToken:
-		return "EndTag"
-	case AttributeToken:
-		return "Attribute"
-	case TextToken:
-		return "Text"
-	case SvgToken:
-		return "Svg"
-	case MathToken:
-		return "Math"
-	}
-	return "Invalid(" + strconv.Itoa(int(tt)) + ")"
-}
-
-////////////////////////////////////////////////////////////////
-
-// Lexer is the state for the lexer.
-type Lexer struct {
-	r   *parse.Input
-	err error
-
-	rawTag Hash
-	inTag  bool
-
-	text    []byte
-	attrVal []byte
-}
-
-// NewLexer returns a new Lexer for a given io.Reader.
-func NewLexer(r *parse.Input) *Lexer {
-	return &Lexer{
-		r: r,
-	}
-}
-
-// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
-func (l *Lexer) Err() error {
-	if l.err != nil {
-		return l.err
-	}
-	return l.r.Err()
-}
-
-// Text returns the textual representation of a token. This excludes delimiters and additional leading/trailing characters.
-func (l *Lexer) Text() []byte {
-	return l.text
-}
-
-// AttrVal returns the attribute value when an AttributeToken was returned from Next.
-func (l *Lexer) AttrVal() []byte {
-	return l.attrVal
-}
-
-// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
-func (l *Lexer) Next() (TokenType, []byte) {
-	l.text = nil
-	var c byte
-	if l.inTag {
-		l.attrVal = nil
-		for { // before attribute name state
-			if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
-				l.r.Move(1)
-				continue
-			}
-			break
-		}
-		if c == 0 && l.r.Err() != nil {
-			return ErrorToken, nil
-		} else if c != '>' && (c != '/' || l.r.Peek(1) != '>') {
-			return AttributeToken, l.shiftAttribute()
-		}
-		l.r.Skip()
-		l.inTag = false
-		if c == '/' {
-			l.r.Move(2)
-			return StartTagVoidToken, l.r.Shift()
-		}
-		l.r.Move(1)
-		return StartTagCloseToken, l.r.Shift()
-	}
-
-	if l.rawTag != 0 {
-		if rawText := l.shiftRawText(); len(rawText) > 0 {
-			l.text = rawText
-			l.rawTag = 0
-			return TextToken, rawText
-		}
-		l.rawTag = 0
-	}
-
-	for {
-		c = l.r.Peek(0)
-		if c == '<' {
-			c = l.r.Peek(1)
-			isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil)
-			if l.r.Pos() > 0 {
-				if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
-					// return currently buffered texttoken so that we can return tag next iteration
-					l.text = l.r.Shift()
-					return TextToken, l.text
-				}
-			} else if isEndTag {
-				l.r.Move(2)
-				// only endtags that are not followed by > or EOF arrive here
-				if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
-					return CommentToken, l.shiftBogusComment()
-				}
-				return EndTagToken, l.shiftEndTag()
-			} else if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
-				l.r.Move(1)
-				l.inTag = true
-				return l.shiftStartTag()
-			} else if c == '!' {
-				l.r.Move(2)
-				return l.readMarkup()
-			} else if c == '?' {
-				l.r.Move(1)
-				return CommentToken, l.shiftBogusComment()
-			}
-		} else if c == 0 && l.r.Err() != nil {
-			if l.r.Pos() > 0 {
-				l.text = l.r.Shift()
-				return TextToken, l.text
-			}
-			return ErrorToken, nil
-		}
-		l.r.Move(1)
-	}
-}
-
-////////////////////////////////////////////////////////////////
-
-// The following functions follow the specifications at https://html.spec.whatwg.org/multipage/parsing.html
-
-func (l *Lexer) shiftRawText() []byte {
-	if l.rawTag == Plaintext {
-		for {
-			if l.r.Peek(0) == 0 && l.r.Err() != nil {
-				return l.r.Shift()
-			}
-			l.r.Move(1)
-		}
-	} else { // RCDATA, RAWTEXT and SCRIPT
-		for {
-			c := l.r.Peek(0)
-			if c == '<' {
-				if l.r.Peek(1) == '/' {
-					mark := l.r.Pos()
-					l.r.Move(2)
-					for {
-						if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
-							break
-						}
-						l.r.Move(1)
-					}
-					if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == l.rawTag { // copy so that ToLower doesn't change the case of the underlying slice
-						l.r.Rewind(mark)
-						return l.r.Shift()
-					}
-				} else if l.rawTag == Script && l.r.Peek(1) == '!' && l.r.Peek(2) == '-' && l.r.Peek(3) == '-' {
-					l.r.Move(4)
-					inScript := false
-					for {
-						c := l.r.Peek(0)
-						if c == '-' && l.r.Peek(1) == '-' && l.r.Peek(2) == '>' {
-							l.r.Move(3)
-							break
-						} else if c == '<' {
-							isEnd := l.r.Peek(1) == '/'
-							if isEnd {
-								l.r.Move(2)
-							} else {
-								l.r.Move(1)
-							}
-							mark := l.r.Pos()
-							for {
-								if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
-									break
-								}
-								l.r.Move(1)
-							}
-							if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark:]))); h == Script { // copy so that ToLower doesn't change the case of the underlying slice
-								if !isEnd {
-									inScript = true
-								} else {
-									if !inScript {
-										l.r.Rewind(mark - 2)
-										return l.r.Shift()
-									}
-									inScript = false
-								}
-							}
-						} else if c == 0 && l.r.Err() != nil {
-							return l.r.Shift()
-						} else {
-							l.r.Move(1)
-						}
-					}
-				} else {
-					l.r.Move(1)
-				}
-			} else if c == 0 && l.r.Err() != nil {
-				return l.r.Shift()
-			} else {
-				l.r.Move(1)
-			}
-		}
-	}
-}
-
-func (l *Lexer) readMarkup() (TokenType, []byte) {
-	if l.at('-', '-') {
-		l.r.Move(2)
-		for {
-			if l.r.Peek(0) == 0 && l.r.Err() != nil {
-				l.text = l.r.Lexeme()[4:]
-				return CommentToken, l.r.Shift()
-			} else if l.at('-', '-', '>') {
-				l.text = l.r.Lexeme()[4:]
-				l.r.Move(3)
-				return CommentToken, l.r.Shift()
-			} else if l.at('-', '-', '!', '>') {
-				l.text = l.r.Lexeme()[4:]
-				l.r.Move(4)
-				return CommentToken, l.r.Shift()
-			}
-			l.r.Move(1)
-		}
-	} else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') {
-		l.r.Move(7)
-		for {
-			if l.r.Peek(0) == 0 && l.r.Err() != nil {
-				l.text = l.r.Lexeme()[9:]
-				return TextToken, l.r.Shift()
-			} else if l.at(']', ']', '>') {
-				l.text = l.r.Lexeme()[9:]
-				l.r.Move(3)
-				return TextToken, l.r.Shift()
-			}
-			l.r.Move(1)
-		}
-	} else {
-		if l.atCaseInsensitive('d', 'o', 'c', 't', 'y', 'p', 'e') {
-			l.r.Move(7)
-			if l.r.Peek(0) == ' ' {
-				l.r.Move(1)
-			}
-			for {
-				if c := l.r.Peek(0); c == '>' || c == 0 && l.r.Err() != nil {
-					l.text = l.r.Lexeme()[9:]
-					if c == '>' {
-						l.r.Move(1)
-					}
-					return DoctypeToken, l.r.Shift()
-				}
-				l.r.Move(1)
-			}
-		}
-	}
-	return CommentToken, l.shiftBogusComment()
-}
-
-func (l *Lexer) shiftBogusComment() []byte {
-	for {
-		c := l.r.Peek(0)
-		if c == '>' {
-			l.text = l.r.Lexeme()[2:]
-			l.r.Move(1)
-			return l.r.Shift()
-		} else if c == 0 && l.r.Err() != nil {
-			l.text = l.r.Lexeme()[2:]
-			return l.r.Shift()
-		}
-		l.r.Move(1)
-	}
-}
-
-func (l *Lexer) shiftStartTag() (TokenType, []byte) {
-	for {
-		if c := l.r.Peek(0); c == ' ' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
-			break
-		}
-		l.r.Move(1)
-	}
-	l.text = parse.ToLower(l.r.Lexeme()[1:])
-	if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math {
-		if h == Svg || h == Math {
-			data := l.shiftXML(h)
-			if l.err != nil {
-				return ErrorToken, nil
-			}
-
-			l.inTag = false
-			if h == Svg {
-				return SvgToken, data
-			}
-			return MathToken, data
-		}
-		l.rawTag = h
-	}
-	return StartTagToken, l.r.Shift()
-}
-
-func (l *Lexer) shiftAttribute() []byte {
-	nameStart := l.r.Pos()
-	var c byte
-	for { // attribute name state
-		if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
-			break
-		}
-		l.r.Move(1)
-	}
-	nameEnd := l.r.Pos()
-	for { // after attribute name state
-		if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
-			l.r.Move(1)
-			continue
-		}
-		break
-	}
-	if c == '=' {
-		l.r.Move(1)
-		for { // before attribute value state
-			if c = l.r.Peek(0); c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' {
-				l.r.Move(1)
-				continue
-			}
-			break
-		}
-		attrPos := l.r.Pos()
-		delim := c
-		if delim == '"' || delim == '\'' { // attribute value single- and double-quoted state
-			l.r.Move(1)
-			for {
-				c := l.r.Peek(0)
-				if c == delim {
-					l.r.Move(1)
-					break
-				} else if c == 0 && l.r.Err() != nil {
-					break
-				}
-				l.r.Move(1)
-			}
-		} else { // attribute value unquoted state
-			for {
-				if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
-					break
-				}
-				l.r.Move(1)
-			}
-		}
-		l.attrVal = l.r.Lexeme()[attrPos:]
-	} else {
-		l.r.Rewind(nameEnd)
-		l.attrVal = nil
-	}
-	l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd])
-	return l.r.Shift()
-}
-
-func (l *Lexer) shiftEndTag() []byte {
-	for {
-		c := l.r.Peek(0)
-		if c == '>' {
-			l.text = l.r.Lexeme()[2:]
-			l.r.Move(1)
-			break
-		} else if c == 0 && l.r.Err() != nil {
-			l.text = l.r.Lexeme()[2:]
-			break
-		}
-		l.r.Move(1)
-	}
-
-	end := len(l.text)
-	for end > 0 {
-		if c := l.text[end-1]; c == ' ' || c == '\t' || c == '\n' || c == '\r' {
-			end--
-			continue
-		}
-		break
-	}
-	l.text = l.text[:end]
-	return parse.ToLower(l.r.Shift())
-}
-
-// shiftXML parses the content of a svg or math tag according to the XML 1.1 specifications, including the tag itself.
-// So far we have already parsed `<svg` or `<math`.
-func (l *Lexer) shiftXML(rawTag Hash) []byte {
-	inQuote := false
-	for {
-		c := l.r.Peek(0)
-		if c == '"' {
-			inQuote = !inQuote
-			l.r.Move(1)
-		} else if c == '<' && !inQuote && l.r.Peek(1) == '/' {
-			mark := l.r.Pos()
-			l.r.Move(2)
-			for {
-				if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
-					break
-				}
-				l.r.Move(1)
-			}
-			if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice
-				break
-			}
-		} else if c == 0 {
-			if l.r.Err() == nil {
-				l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
-			}
-			return l.r.Shift()
-		} else {
-			l.r.Move(1)
-		}
-	}
-
-	for {
-		c := l.r.Peek(0)
-		if c == '>' {
-			l.r.Move(1)
-			break
-		} else if c == 0 {
-			if l.r.Err() == nil {
-				l.err = parse.NewErrorLexer(l.r, "HTML parse error: unexpected NULL character")
-			}
-			return l.r.Shift()
-		}
-		l.r.Move(1)
-	}
-	return l.r.Shift()
-}
-
-////////////////////////////////////////////////////////////////
-
-func (l *Lexer) at(b ...byte) bool {
-	for i, c := range b {
-		if l.r.Peek(i) != c {
-			return false
-		}
-	}
-	return true
-}
-
-func (l *Lexer) atCaseInsensitive(b ...byte) bool {
-	for i, c := range b {
-		if l.r.Peek(i) != c && (l.r.Peek(i)+('a'-'A')) != c {
-			return false
-		}
-	}
-	return true
-}