From ce190d867ca126001a1c0417b00810fc03c0b3ba Mon Sep 17 00:00:00 2001 From: Tobi Smethurst <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 16 Aug 2021 19:17:56 +0200 Subject: Text/status parsing fixes (#141) * aaaaaa * vendor minify * update + test markdown parsing --- vendor/github.com/tdewolff/parse/v2/html/README.md | 98 ++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 vendor/github.com/tdewolff/parse/v2/html/README.md (limited to 'vendor/github.com/tdewolff/parse/v2/html/README.md') diff --git a/vendor/github.com/tdewolff/parse/v2/html/README.md b/vendor/github.com/tdewolff/parse/v2/html/README.md new file mode 100644 index 000000000..53145dbd9 --- /dev/null +++ b/vendor/github.com/tdewolff/parse/v2/html/README.md @@ -0,0 +1,98 @@ +# HTML [![API reference](https://img.shields.io/badge/godoc-reference-5272B4)](https://pkg.go.dev/github.com/tdewolff/parse/v2/html?tab=doc) + +This package is an HTML5 lexer written in [Go][1]. It follows the specification at [The HTML syntax](http://www.w3.org/TR/html5/syntax.html). The lexer takes an io.Reader and converts it into tokens until the EOF. + +## Installation +Run the following command + + go get -u github.com/tdewolff/parse/v2/html + +or add the following import and run project with `go get` + + import "github.com/tdewolff/parse/v2/html" + +## Lexer +### Usage +The following initializes a new Lexer with io.Reader `r`: +``` go +l := html.NewLexer(parse.NewInput(r)) +``` + +To tokenize until EOF an error, use: +``` go +for { + tt, data := l.Next() + switch tt { + case html.ErrorToken: + // error or EOF set in l.Err() + return + case html.StartTagToken: + // ... + for { + ttAttr, dataAttr := l.Next() + if ttAttr != html.AttributeToken { + break + } + // ... + } + // ... + } +} +``` + +All tokens: +``` go +ErrorToken TokenType = iota // extra token when errors occur +CommentToken +DoctypeToken +StartTagToken +StartTagCloseToken +StartTagVoidToken +EndTagToken +AttributeToken +TextToken +``` + +### Examples +``` go +package main + +import ( + "os" + + "github.com/tdewolff/parse/v2/html" +) + +// Tokenize HTML from stdin. +func main() { + l := html.NewLexer(parse.NewInput(os.Stdin)) + for { + tt, data := l.Next() + switch tt { + case html.ErrorToken: + if l.Err() != io.EOF { + fmt.Println("Error on line", l.Line(), ":", l.Err()) + } + return + case html.StartTagToken: + fmt.Println("Tag", string(data)) + for { + ttAttr, dataAttr := l.Next() + if ttAttr != html.AttributeToken { + break + } + + key := dataAttr + val := l.AttrVal() + fmt.Println("Attribute", string(key), "=", string(val)) + } + // ... + } + } +} +``` + +## License +Released under the [MIT license](https://github.com/tdewolff/parse/blob/master/LICENSE.md). + +[1]: http://golang.org/ "Go Language" -- cgit v1.3