summaryrefslogtreecommitdiff
path: root/vendor/github.com/tdewolff/parse/v2/html/lex.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/tdewolff/parse/v2/html/lex.go')
-rw-r--r--vendor/github.com/tdewolff/parse/v2/html/lex.go113
1 files changed, 101 insertions, 12 deletions
diff --git a/vendor/github.com/tdewolff/parse/v2/html/lex.go b/vendor/github.com/tdewolff/parse/v2/html/lex.go
index 5619ce9e7..8fc9073d8 100644
--- a/vendor/github.com/tdewolff/parse/v2/html/lex.go
+++ b/vendor/github.com/tdewolff/parse/v2/html/lex.go
@@ -56,16 +56,26 @@ func (tt TokenType) String() string {
////////////////////////////////////////////////////////////////
+var GoTemplate = [2]string{"{{", "}}"}
+var HandlebarsTemplate = [2]string{"{{", "}}"}
+var MustacheTemplate = [2]string{"{{", "}}"}
+var EJSTemplate = [2]string{"<%", "%>"}
+var ASPTemplate = [2]string{"<%", "%>"}
+var PHPTemplate = [2]string{"<?", "?>"}
+
// Lexer is the state for the lexer.
type Lexer struct {
- r *parse.Input
- err error
+ r *parse.Input
+ tmplBegin []byte
+ tmplEnd []byte
+ err error
rawTag Hash
inTag bool
text []byte
attrVal []byte
+ hasTmpl bool
}
// NewLexer returns a new Lexer for a given io.Reader.
@@ -75,6 +85,14 @@ func NewLexer(r *parse.Input) *Lexer {
}
}
+func NewTemplateLexer(r *parse.Input, tmpl [2]string) *Lexer {
+ return &Lexer{
+ r: r,
+ tmplBegin: []byte(tmpl[0]),
+ tmplEnd: []byte(tmpl[1]),
+ }
+}
+
// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
func (l *Lexer) Err() error {
if l.err != nil {
@@ -88,14 +106,25 @@ func (l *Lexer) Text() []byte {
return l.text
}
+// AttrKey returns the attribute key when an AttributeToken was returned from Next.
+func (l *Lexer) AttrKey() []byte {
+ return l.text
+}
+
// AttrVal returns the attribute value when an AttributeToken was returned from Next.
func (l *Lexer) AttrVal() []byte {
return l.attrVal
}
+// HasTemplate returns the true if the token value contains a template.
+func (l *Lexer) HasTemplate() bool {
+ return l.hasTmpl
+}
+
// Next returns the next Token. It returns ErrorToken when an error was encountered. Using Err() one can retrieve the error message.
func (l *Lexer) Next() (TokenType, []byte) {
l.text = nil
+ l.hasTmpl = false
var c byte
if l.inTag {
l.attrVal = nil
@@ -122,7 +151,7 @@ func (l *Lexer) Next() (TokenType, []byte) {
}
if l.rawTag != 0 {
- if rawText := l.shiftRawText(); len(rawText) > 0 {
+ if rawText := l.shiftRawText(); 0 < len(rawText) {
l.text = rawText
l.rawTag = 0
return TextToken, rawText
@@ -135,12 +164,12 @@ func (l *Lexer) Next() (TokenType, []byte) {
if c == '<' {
c = l.r.Peek(1)
isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil)
- if l.r.Pos() > 0 {
- if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
- // return currently buffered texttoken so that we can return tag next iteration
- l.text = l.r.Shift()
- return TextToken, l.text
- }
+ if !isEndTag && (c < 'a' || 'z' < c) && (c < 'A' || 'Z' < c) && c != '!' && c != '?' {
+ // not a tag
+ } else if 0 < l.r.Pos() {
+ // return currently buffered texttoken so that we can return tag next iteration
+ l.text = l.r.Shift()
+ return TextToken, l.text
} else if isEndTag {
l.r.Move(2)
// only endtags that are not followed by > or EOF arrive here
@@ -159,8 +188,12 @@ func (l *Lexer) Next() (TokenType, []byte) {
l.r.Move(1)
return CommentToken, l.shiftBogusComment()
}
+ } else if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) {
+ l.r.Move(len(l.tmplBegin))
+ l.moveTemplate()
+ l.hasTmpl = true
} else if c == 0 && l.r.Err() != nil {
- if l.r.Pos() > 0 {
+ if 0 < l.r.Pos() {
l.text = l.r.Shift()
return TextToken, l.text
}
@@ -241,6 +274,10 @@ func (l *Lexer) shiftRawText() []byte {
} else {
l.r.Move(1)
}
+ } else if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) {
+ l.r.Move(len(l.tmplBegin))
+ l.moveTemplate()
+ l.hasTmpl = true
} else if c == 0 && l.r.Err() != nil {
return l.r.Shift()
} else {
@@ -346,6 +383,11 @@ func (l *Lexer) shiftStartTag() (TokenType, []byte) {
func (l *Lexer) shiftAttribute() []byte {
nameStart := l.r.Pos()
var c byte
+ if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) {
+ l.r.Move(len(l.tmplBegin))
+ l.moveTemplate()
+ l.hasTmpl = true
+ }
for { // attribute name state
if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
@@ -360,6 +402,7 @@ func (l *Lexer) shiftAttribute() []byte {
}
break
}
+ nameHasTmpl := l.hasTmpl
if c == '=' {
l.r.Move(1)
for { // before attribute value state
@@ -378,11 +421,20 @@ func (l *Lexer) shiftAttribute() []byte {
if c == delim {
l.r.Move(1)
break
+ } else if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) {
+ l.r.Move(len(l.tmplBegin))
+ l.moveTemplate()
+ l.hasTmpl = true
} else if c == 0 && l.r.Err() != nil {
break
+ } else {
+ l.r.Move(1)
}
- l.r.Move(1)
}
+ } else if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) {
+ l.r.Move(len(l.tmplBegin))
+ l.moveTemplate()
+ l.hasTmpl = true
} else { // attribute value unquoted state
for {
if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
@@ -396,7 +448,15 @@ func (l *Lexer) shiftAttribute() []byte {
l.r.Rewind(nameEnd)
l.attrVal = nil
}
- l.text = parse.ToLower(l.r.Lexeme()[nameStart:nameEnd])
+ if 0 < len(l.tmplBegin) && l.at(l.tmplBegin...) {
+ l.r.Move(len(l.tmplBegin))
+ l.moveTemplate()
+ l.hasTmpl = true
+ }
+ l.text = l.r.Lexeme()[nameStart:nameEnd]
+ if !nameHasTmpl {
+ l.text = parse.ToLower(l.text)
+ }
return l.r.Shift()
}
@@ -473,6 +533,35 @@ func (l *Lexer) shiftXML(rawTag Hash) []byte {
return l.r.Shift()
}
+func (l *Lexer) moveTemplate() {
+ for {
+ if c := l.r.Peek(0); l.at(l.tmplEnd...) || c == 0 && l.r.Err() != nil {
+ if c != 0 {
+ l.r.Move(len(l.tmplEnd))
+ }
+ break
+ } else if c == '"' || c == '\'' {
+ l.r.Move(1)
+ escape := false
+ for {
+ if c2 := l.r.Peek(0); !escape && c2 == c || c2 == 0 && l.r.Err() != nil {
+ if c2 != 0 {
+ l.r.Move(1)
+ }
+ break
+ } else if c2 == '\\' {
+ escape = !escape
+ } else {
+ escape = false
+ }
+ l.r.Move(1)
+ }
+ } else {
+ l.r.Move(1)
+ }
+ }
+}
+
////////////////////////////////////////////////////////////////
func (l *Lexer) at(b ...byte) bool {