diff options
Diffstat (limited to 'vendor/github.com/tdewolff/parse/v2/html/lex.go')
-rw-r--r-- | vendor/github.com/tdewolff/parse/v2/html/lex.go | 113 |
1 files changed, 101 insertions, 12 deletions
diff --git a/vendor/github.com/tdewolff/parse/v2/html/lex.go b/vendor/github.com/tdewolff/parse/v2/html/lex.go index 5619ce9e7..8fc9073d8 100644 --- a/vendor/github.com/tdewolff/parse/v2/html/lex.go +++ b/vendor/github.com/tdewolff/parse/v2/html/lex.go @@ -56,16 +56,26 @@ func (tt TokenType) String() string { //////////////////////////////////////////////////////////////// +var GoTemplate = [2]string{"{{", "}}"} +var HandlebarsTemplate = [2]string{"{{", "}}"} +var MustacheTemplate = [2]string{"{{", "}}"} +var EJSTemplate = [2]string{"<%", "%>"} +var ASPTemplate = [2]string{"<%", "%>"} +var PHPTemplate = [2]string{"<?", "?>"} + // Lexer is the state for the lexer. type Lexer struct { - r *parse.Input - err error + r *parse.Input + tmplBegin []byte + tmplEnd []byte + err error rawTag Hash inTag bool text []byte attrVal []byte + hasTmpl bool } // NewLexer returns a new Lexer for a given io.Reader. @@ -75,6 +85,14 @@ func NewLexer(r *parse.Input) *Lexer { } } +func NewTemplateLexer(r *parse.Input, tmpl [2]string) *Lexer { + return &Lexer{ + r: r, + tmplBegin: []byte(tmpl[0]), + tmplEnd: []byte(tmpl[1]), + } +} + // Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned. func (l *Lexer) Err() error { if l.err != nil { @@ -88,14 +106,25 @@ func (l *Lexer) Text() []byte { return l.text } +// AttrKey returns the attribute key when an AttributeToken was returned from Next. +func (l *Lexer) AttrKey() []byte { + return l.text +} + // AttrVal returns the attribute value when an AttributeToken was returned from Next. func (l *Lexer) AttrVal() []byte { return l.attrVal } +// HasTemplate returns the true if the token value contains a template. +func (l *Lexer) HasTemplate() bool { + return l.hasTmpl +} + // Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message. func (l *Lexer) Next() (TokenType, []byte) { l.text = nil + l.hasTmpl = false var c byte if l.inTag { l.attrVal = nil @@ -122,7 +151,7 @@ func (l *Lexer) Next() (TokenType, []byte) { } if l.rawTag != 0 { - if rawText := l.shiftRawText(); len(rawText) > 0 { + if rawText := l.shiftRawText(); 0 < len(rawText) { l.text = rawText l.rawTag = 0 return TextToken, rawText @@ -135,12 +164,12 @@ func (l *Lexer) Next() (TokenType, []byte) { if c == '<' { c = l.r.Peek(1) isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil) - if l.r.Pos() > 0 { - if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' { - // return currently buffered texttoken so that we can return tag next iteration - l.text = l.r.Shift() - return TextToken, l.text - } + if !isEndTag && (c < 'a' || 'z' < c) && (c < 'A' || 'Z' < c) && c != '!' && c != '?' { + // not a tag + } else if 0 < l.r.Pos() { + // return currently buffered texttoken so that we can return tag next iteration + l.text = l.r.Shift() + return TextToken, l.text } else if isEndTag { l.r.Move(2) // only endtags that are not followed by > or EOF arrive here @@ -159,8 +188,12 @@ func (l *Lexer) Next() (TokenType, []byte) { l.r.Move(1) return CommentToken, l.shiftBogusComment() } + } else if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) { + l.r.Move(len(l.tmplBegin)) + l.moveTemplate() + l.hasTmpl = true } else if c == 0 && l.r.Err() != nil { - if l.r.Pos() > 0 { + if 0 < l.r.Pos() { l.text = l.r.Shift() return TextToken, l.text } @@ -241,6 +274,10 @@ func (l *Lexer) shiftRawText() []byte { } else { l.r.Move(1) } + } else if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) { + l.r.Move(len(l.tmplBegin)) + l.moveTemplate() + l.hasTmpl = true } else if c == 0 && l.r.Err() != nil { return l.r.Shift() } else { @@ -346,6 +383,11 @@ func (l *Lexer) shiftStartTag() (TokenType, []byte) { func (l *Lexer) shiftAttribute() []byte { nameStart := l.r.Pos() var c byte + if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) { + l.r.Move(len(l.tmplBegin)) + l.moveTemplate() + l.hasTmpl = true + } for { // attribute name state if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil { break @@ -360,6 +402,7 @@ func (l *Lexer) shiftAttribute() []byte { } break } + nameHasTmpl := l.hasTmpl if c == '=' { l.r.Move(1) for { // before attribute value state @@ -378,11 +421,20 @@ func (l *Lexer) shiftAttribute() []byte { if c == delim { l.r.Move(1) break + } else if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) { + l.r.Move(len(l.tmplBegin)) + l.moveTemplate() + l.hasTmpl = true } else if c == 0 && l.r.Err() != nil { break + } else { + l.r.Move(1) } - l.r.Move(1) } + } else if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) { + l.r.Move(len(l.tmplBegin)) + l.moveTemplate() + l.hasTmpl = true } else { // attribute value unquoted state for { if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil { @@ -396,7 +448,15 @@ func (l *Lexer) shiftAttribute() []byte { l.r.Rewind(nameEnd) l.attrVal = nil } - l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd]) + if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) { + l.r.Move(len(l.tmplBegin)) + l.moveTemplate() + l.hasTmpl = true + } + l.text = l.r.Lexeme()[nameStart:nameEnd] + if !nameHasTmpl { + l.text = parse.ToLower(l.text) + } return l.r.Shift() } @@ -473,6 +533,35 @@ func (l *Lexer) shiftXML(rawTag Hash) []byte { return l.r.Shift() } +func (l *Lexer) moveTemplate() { + for { + if c := l.r.Peek(0); l.at(l.tmplEnd...) || c == 0 && l.r.Err() != nil { + if c != 0 { + l.r.Move(len(l.tmplEnd)) + } + break + } else if c == '"' || c == '\'' { + l.r.Move(1) + escape := false + for { + if c2 := l.r.Peek(0); !escape && c2 == c || c2 == 0 && l.r.Err() != nil { + if c2 != 0 { + l.r.Move(1) + } + break + } else if c2 == '\\' { + escape = !escape + } else { + escape = false + } + l.r.Move(1) + } + } else { + l.r.Move(1) + } + } +} + //////////////////////////////////////////////////////////////// func (l *Lexer) at(b ...byte) bool { |