summaryrefslogtreecommitdiff
path: root/vendor/github.com/pelletier/go-toml/lexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/pelletier/go-toml/lexer.go')
-rw-r--r--vendor/github.com/pelletier/go-toml/lexer.go1031
1 files changed, 1031 insertions, 0 deletions
diff --git a/vendor/github.com/pelletier/go-toml/lexer.go b/vendor/github.com/pelletier/go-toml/lexer.go
new file mode 100644
index 000000000..313908e3e
--- /dev/null
+++ b/vendor/github.com/pelletier/go-toml/lexer.go
@@ -0,0 +1,1031 @@
+// TOML lexer.
+//
+// Written using the principles developed by Rob Pike in
+// http://www.youtube.com/watch?v=HxaD_trXwRE
+
+package toml
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+// Define state functions
+type tomlLexStateFn func() tomlLexStateFn
+
+// Define lexer
+type tomlLexer struct {
+ inputIdx int
+ input []rune // Textual source
+ currentTokenStart int
+ currentTokenStop int
+ tokens []token
+ brackets []rune
+ line int
+ col int
+ endbufferLine int
+ endbufferCol int
+}
+
+// Basic read operations on input
+
+func (l *tomlLexer) read() rune {
+ r := l.peek()
+ if r == '\n' {
+ l.endbufferLine++
+ l.endbufferCol = 1
+ } else {
+ l.endbufferCol++
+ }
+ l.inputIdx++
+ return r
+}
+
+func (l *tomlLexer) next() rune {
+ r := l.read()
+
+ if r != eof {
+ l.currentTokenStop++
+ }
+ return r
+}
+
+func (l *tomlLexer) ignore() {
+ l.currentTokenStart = l.currentTokenStop
+ l.line = l.endbufferLine
+ l.col = l.endbufferCol
+}
+
+func (l *tomlLexer) skip() {
+ l.next()
+ l.ignore()
+}
+
+func (l *tomlLexer) fastForward(n int) {
+ for i := 0; i < n; i++ {
+ l.next()
+ }
+}
+
+func (l *tomlLexer) emitWithValue(t tokenType, value string) {
+ l.tokens = append(l.tokens, token{
+ Position: Position{l.line, l.col},
+ typ: t,
+ val: value,
+ })
+ l.ignore()
+}
+
+func (l *tomlLexer) emit(t tokenType) {
+ l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
+}
+
+func (l *tomlLexer) peek() rune {
+ if l.inputIdx >= len(l.input) {
+ return eof
+ }
+ return l.input[l.inputIdx]
+}
+
+func (l *tomlLexer) peekString(size int) string {
+ maxIdx := len(l.input)
+ upperIdx := l.inputIdx + size // FIXME: potential overflow
+ if upperIdx > maxIdx {
+ upperIdx = maxIdx
+ }
+ return string(l.input[l.inputIdx:upperIdx])
+}
+
+func (l *tomlLexer) follow(next string) bool {
+ return next == l.peekString(len(next))
+}
+
+// Error management
+
+func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
+ l.tokens = append(l.tokens, token{
+ Position: Position{l.line, l.col},
+ typ: tokenError,
+ val: fmt.Sprintf(format, args...),
+ })
+ return nil
+}
+
+// State functions
+
+func (l *tomlLexer) lexVoid() tomlLexStateFn {
+ for {
+ next := l.peek()
+ switch next {
+ case '}': // after '{'
+ return l.lexRightCurlyBrace
+ case '[':
+ return l.lexTableKey
+ case '#':
+ return l.lexComment(l.lexVoid)
+ case '=':
+ return l.lexEqual
+ case '\r':
+ fallthrough
+ case '\n':
+ l.skip()
+ continue
+ }
+
+ if isSpace(next) {
+ l.skip()
+ }
+
+ if isKeyStartChar(next) {
+ return l.lexKey
+ }
+
+ if next == eof {
+ l.next()
+ break
+ }
+ }
+
+ l.emit(tokenEOF)
+ return nil
+}
+
+func (l *tomlLexer) lexRvalue() tomlLexStateFn {
+ for {
+ next := l.peek()
+ switch next {
+ case '.':
+ return l.errorf("cannot start float with a dot")
+ case '=':
+ return l.lexEqual
+ case '[':
+ return l.lexLeftBracket
+ case ']':
+ return l.lexRightBracket
+ case '{':
+ return l.lexLeftCurlyBrace
+ case '}':
+ return l.lexRightCurlyBrace
+ case '#':
+ return l.lexComment(l.lexRvalue)
+ case '"':
+ return l.lexString
+ case '\'':
+ return l.lexLiteralString
+ case ',':
+ return l.lexComma
+ case '\r':
+ fallthrough
+ case '\n':
+ l.skip()
+ if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '[' {
+ return l.lexRvalue
+ }
+ return l.lexVoid
+ }
+
+ if l.follow("true") {
+ return l.lexTrue
+ }
+
+ if l.follow("false") {
+ return l.lexFalse
+ }
+
+ if l.follow("inf") {
+ return l.lexInf
+ }
+
+ if l.follow("nan") {
+ return l.lexNan
+ }
+
+ if isSpace(next) {
+ l.skip()
+ continue
+ }
+
+ if next == eof {
+ l.next()
+ break
+ }
+
+ if next == '+' || next == '-' {
+ return l.lexNumber
+ }
+
+ if isDigit(next) {
+ return l.lexDateTimeOrNumber
+ }
+
+ return l.errorf("no value can start with %c", next)
+ }
+
+ l.emit(tokenEOF)
+ return nil
+}
+
+func (l *tomlLexer) lexDateTimeOrNumber() tomlLexStateFn {
+ // Could be either a date/time, or a digit.
+ // The options for date/times are:
+ // YYYY-... => date or date-time
+ // HH:... => time
+ // Anything else should be a number.
+
+ lookAhead := l.peekString(5)
+ if len(lookAhead) < 3 {
+ return l.lexNumber()
+ }
+
+ for idx, r := range lookAhead {
+ if !isDigit(r) {
+ if idx == 2 && r == ':' {
+ return l.lexDateTimeOrTime()
+ }
+ if idx == 4 && r == '-' {
+ return l.lexDateTimeOrTime()
+ }
+ return l.lexNumber()
+ }
+ }
+ return l.lexNumber()
+}
+
+func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn {
+ l.next()
+ l.emit(tokenLeftCurlyBrace)
+ l.brackets = append(l.brackets, '{')
+ return l.lexVoid
+}
+
+func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn {
+ l.next()
+ l.emit(tokenRightCurlyBrace)
+ if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '{' {
+ return l.errorf("cannot have '}' here")
+ }
+ l.brackets = l.brackets[:len(l.brackets)-1]
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) lexDateTimeOrTime() tomlLexStateFn {
+ // Example matches:
+ // 1979-05-27T07:32:00Z
+ // 1979-05-27T00:32:00-07:00
+ // 1979-05-27T00:32:00.999999-07:00
+ // 1979-05-27 07:32:00Z
+ // 1979-05-27 00:32:00-07:00
+ // 1979-05-27 00:32:00.999999-07:00
+ // 1979-05-27T07:32:00
+ // 1979-05-27T00:32:00.999999
+ // 1979-05-27 07:32:00
+ // 1979-05-27 00:32:00.999999
+ // 1979-05-27
+ // 07:32:00
+ // 00:32:00.999999
+
+ // we already know those two are digits
+ l.next()
+ l.next()
+
+ // Got 2 digits. At that point it could be either a time or a date(-time).
+
+ r := l.next()
+ if r == ':' {
+ return l.lexTime()
+ }
+
+ return l.lexDateTime()
+}
+
+func (l *tomlLexer) lexDateTime() tomlLexStateFn {
+ // This state accepts an offset date-time, a local date-time, or a local date.
+ //
+ // v--- cursor
+ // 1979-05-27T07:32:00Z
+ // 1979-05-27T00:32:00-07:00
+ // 1979-05-27T00:32:00.999999-07:00
+ // 1979-05-27 07:32:00Z
+ // 1979-05-27 00:32:00-07:00
+ // 1979-05-27 00:32:00.999999-07:00
+ // 1979-05-27T07:32:00
+ // 1979-05-27T00:32:00.999999
+ // 1979-05-27 07:32:00
+ // 1979-05-27 00:32:00.999999
+ // 1979-05-27
+
+ // date
+
+ // already checked by lexRvalue
+ l.next() // digit
+ l.next() // -
+
+ for i := 0; i < 2; i++ {
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("invalid month digit in date: %c", r)
+ }
+ }
+
+ r := l.next()
+ if r != '-' {
+ return l.errorf("expected - to separate month of a date, not %c", r)
+ }
+
+ for i := 0; i < 2; i++ {
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("invalid day digit in date: %c", r)
+ }
+ }
+
+ l.emit(tokenLocalDate)
+
+ r = l.peek()
+
+ if r == eof {
+
+ return l.lexRvalue
+ }
+
+ if r != ' ' && r != 'T' {
+ return l.errorf("incorrect date/time separation character: %c", r)
+ }
+
+ if r == ' ' {
+ lookAhead := l.peekString(3)[1:]
+ if len(lookAhead) < 2 {
+ return l.lexRvalue
+ }
+ for _, r := range lookAhead {
+ if !isDigit(r) {
+ return l.lexRvalue
+ }
+ }
+ }
+
+ l.skip() // skip the T or ' '
+
+ // time
+
+ for i := 0; i < 2; i++ {
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("invalid hour digit in time: %c", r)
+ }
+ }
+
+ r = l.next()
+ if r != ':' {
+ return l.errorf("time hour/minute separator should be :, not %c", r)
+ }
+
+ for i := 0; i < 2; i++ {
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("invalid minute digit in time: %c", r)
+ }
+ }
+
+ r = l.next()
+ if r != ':' {
+ return l.errorf("time minute/second separator should be :, not %c", r)
+ }
+
+ for i := 0; i < 2; i++ {
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("invalid second digit in time: %c", r)
+ }
+ }
+
+ r = l.peek()
+ if r == '.' {
+ l.next()
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("expected at least one digit in time's fraction, not %c", r)
+ }
+
+ for {
+ r := l.peek()
+ if !isDigit(r) {
+ break
+ }
+ l.next()
+ }
+ }
+
+ l.emit(tokenLocalTime)
+
+ return l.lexTimeOffset
+
+}
+
+func (l *tomlLexer) lexTimeOffset() tomlLexStateFn {
+ // potential offset
+
+ // Z
+ // -07:00
+ // +07:00
+ // nothing
+
+ r := l.peek()
+
+ if r == 'Z' {
+ l.next()
+ l.emit(tokenTimeOffset)
+ } else if r == '+' || r == '-' {
+ l.next()
+
+ for i := 0; i < 2; i++ {
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("invalid hour digit in time offset: %c", r)
+ }
+ }
+
+ r = l.next()
+ if r != ':' {
+ return l.errorf("time offset hour/minute separator should be :, not %c", r)
+ }
+
+ for i := 0; i < 2; i++ {
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("invalid minute digit in time offset: %c", r)
+ }
+ }
+
+ l.emit(tokenTimeOffset)
+ }
+
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) lexTime() tomlLexStateFn {
+ // v--- cursor
+ // 07:32:00
+ // 00:32:00.999999
+
+ for i := 0; i < 2; i++ {
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("invalid minute digit in time: %c", r)
+ }
+ }
+
+ r := l.next()
+ if r != ':' {
+ return l.errorf("time minute/second separator should be :, not %c", r)
+ }
+
+ for i := 0; i < 2; i++ {
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("invalid second digit in time: %c", r)
+ }
+ }
+
+ r = l.peek()
+ if r == '.' {
+ l.next()
+ r := l.next()
+ if !isDigit(r) {
+ return l.errorf("expected at least one digit in time's fraction, not %c", r)
+ }
+
+ for {
+ r := l.peek()
+ if !isDigit(r) {
+ break
+ }
+ l.next()
+ }
+ }
+
+ l.emit(tokenLocalTime)
+ return l.lexRvalue
+
+}
+
+func (l *tomlLexer) lexTrue() tomlLexStateFn {
+ l.fastForward(4)
+ l.emit(tokenTrue)
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) lexFalse() tomlLexStateFn {
+ l.fastForward(5)
+ l.emit(tokenFalse)
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) lexInf() tomlLexStateFn {
+ l.fastForward(3)
+ l.emit(tokenInf)
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) lexNan() tomlLexStateFn {
+ l.fastForward(3)
+ l.emit(tokenNan)
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) lexEqual() tomlLexStateFn {
+ l.next()
+ l.emit(tokenEqual)
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) lexComma() tomlLexStateFn {
+ l.next()
+ l.emit(tokenComma)
+ if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '{' {
+ return l.lexVoid
+ }
+ return l.lexRvalue
+}
+
+// Parse the key and emits its value without escape sequences.
+// bare keys, basic string keys and literal string keys are supported.
+func (l *tomlLexer) lexKey() tomlLexStateFn {
+ var sb strings.Builder
+
+ for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
+ if r == '"' {
+ l.next()
+ str, err := l.lexStringAsString(`"`, false, true)
+ if err != nil {
+ return l.errorf(err.Error())
+ }
+ sb.WriteString("\"")
+ sb.WriteString(str)
+ sb.WriteString("\"")
+ l.next()
+ continue
+ } else if r == '\'' {
+ l.next()
+ str, err := l.lexLiteralStringAsString(`'`, false)
+ if err != nil {
+ return l.errorf(err.Error())
+ }
+ sb.WriteString("'")
+ sb.WriteString(str)
+ sb.WriteString("'")
+ l.next()
+ continue
+ } else if r == '\n' {
+ return l.errorf("keys cannot contain new lines")
+ } else if isSpace(r) {
+ var str strings.Builder
+ str.WriteString(" ")
+
+ // skip trailing whitespace
+ l.next()
+ for r = l.peek(); isSpace(r); r = l.peek() {
+ str.WriteRune(r)
+ l.next()
+ }
+ // break loop if not a dot
+ if r != '.' {
+ break
+ }
+ str.WriteString(".")
+ // skip trailing whitespace after dot
+ l.next()
+ for r = l.peek(); isSpace(r); r = l.peek() {
+ str.WriteRune(r)
+ l.next()
+ }
+ sb.WriteString(str.String())
+ continue
+ } else if r == '.' {
+ // skip
+ } else if !isValidBareChar(r) {
+ return l.errorf("keys cannot contain %c character", r)
+ }
+ sb.WriteRune(r)
+ l.next()
+ }
+ l.emitWithValue(tokenKey, sb.String())
+ return l.lexVoid
+}
+
+func (l *tomlLexer) lexComment(previousState tomlLexStateFn) tomlLexStateFn {
+ return func() tomlLexStateFn {
+ for next := l.peek(); next != '\n' && next != eof; next = l.peek() {
+ if next == '\r' && l.follow("\r\n") {
+ break
+ }
+ l.next()
+ }
+ l.ignore()
+ return previousState
+ }
+}
+
+func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
+ l.next()
+ l.emit(tokenLeftBracket)
+ l.brackets = append(l.brackets, '[')
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) {
+ var sb strings.Builder
+
+ if discardLeadingNewLine {
+ if l.follow("\r\n") {
+ l.skip()
+ l.skip()
+ } else if l.peek() == '\n' {
+ l.skip()
+ }
+ }
+
+ // find end of string
+ for {
+ if l.follow(terminator) {
+ return sb.String(), nil
+ }
+
+ next := l.peek()
+ if next == eof {
+ break
+ }
+ sb.WriteRune(l.next())
+ }
+
+ return "", errors.New("unclosed string")
+}
+
+func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
+ l.skip()
+
+ // handle special case for triple-quote
+ terminator := "'"
+ discardLeadingNewLine := false
+ if l.follow("''") {
+ l.skip()
+ l.skip()
+ terminator = "'''"
+ discardLeadingNewLine = true
+ }
+
+ str, err := l.lexLiteralStringAsString(terminator, discardLeadingNewLine)
+ if err != nil {
+ return l.errorf(err.Error())
+ }
+
+ l.emitWithValue(tokenString, str)
+ l.fastForward(len(terminator))
+ l.ignore()
+ return l.lexRvalue
+}
+
+// Lex a string and return the results as a string.
+// Terminator is the substring indicating the end of the token.
+// The resulting string does not include the terminator.
+func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) {
+ var sb strings.Builder
+
+ if discardLeadingNewLine {
+ if l.follow("\r\n") {
+ l.skip()
+ l.skip()
+ } else if l.peek() == '\n' {
+ l.skip()
+ }
+ }
+
+ for {
+ if l.follow(terminator) {
+ return sb.String(), nil
+ }
+
+ if l.follow("\\") {
+ l.next()
+ switch l.peek() {
+ case '\r':
+ fallthrough
+ case '\n':
+ fallthrough
+ case '\t':
+ fallthrough
+ case ' ':
+ // skip all whitespace chars following backslash
+ for strings.ContainsRune("\r\n\t ", l.peek()) {
+ l.next()
+ }
+ case '"':
+ sb.WriteString("\"")
+ l.next()
+ case 'n':
+ sb.WriteString("\n")
+ l.next()
+ case 'b':
+ sb.WriteString("\b")
+ l.next()
+ case 'f':
+ sb.WriteString("\f")
+ l.next()
+ case '/':
+ sb.WriteString("/")
+ l.next()
+ case 't':
+ sb.WriteString("\t")
+ l.next()
+ case 'r':
+ sb.WriteString("\r")
+ l.next()
+ case '\\':
+ sb.WriteString("\\")
+ l.next()
+ case 'u':
+ l.next()
+ var code strings.Builder
+ for i := 0; i < 4; i++ {
+ c := l.peek()
+ if !isHexDigit(c) {
+ return "", errors.New("unfinished unicode escape")
+ }
+ l.next()
+ code.WriteRune(c)
+ }
+ intcode, err := strconv.ParseInt(code.String(), 16, 32)
+ if err != nil {
+ return "", errors.New("invalid unicode escape: \\u" + code.String())
+ }
+ sb.WriteRune(rune(intcode))
+ case 'U':
+ l.next()
+ var code strings.Builder
+ for i := 0; i < 8; i++ {
+ c := l.peek()
+ if !isHexDigit(c) {
+ return "", errors.New("unfinished unicode escape")
+ }
+ l.next()
+ code.WriteRune(c)
+ }
+ intcode, err := strconv.ParseInt(code.String(), 16, 64)
+ if err != nil {
+ return "", errors.New("invalid unicode escape: \\U" + code.String())
+ }
+ sb.WriteRune(rune(intcode))
+ default:
+ return "", errors.New("invalid escape sequence: \\" + string(l.peek()))
+ }
+ } else {
+ r := l.peek()
+
+ if 0x00 <= r && r <= 0x1F && r != '\t' && !(acceptNewLines && (r == '\n' || r == '\r')) {
+ return "", fmt.Errorf("unescaped control character %U", r)
+ }
+ l.next()
+ sb.WriteRune(r)
+ }
+
+ if l.peek() == eof {
+ break
+ }
+ }
+
+ return "", errors.New("unclosed string")
+}
+
+func (l *tomlLexer) lexString() tomlLexStateFn {
+ l.skip()
+
+ // handle special case for triple-quote
+ terminator := `"`
+ discardLeadingNewLine := false
+ acceptNewLines := false
+ if l.follow(`""`) {
+ l.skip()
+ l.skip()
+ terminator = `"""`
+ discardLeadingNewLine = true
+ acceptNewLines = true
+ }
+
+ str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines)
+ if err != nil {
+ return l.errorf(err.Error())
+ }
+
+ l.emitWithValue(tokenString, str)
+ l.fastForward(len(terminator))
+ l.ignore()
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) lexTableKey() tomlLexStateFn {
+ l.next()
+
+ if l.peek() == '[' {
+ // token '[[' signifies an array of tables
+ l.next()
+ l.emit(tokenDoubleLeftBracket)
+ return l.lexInsideTableArrayKey
+ }
+ // vanilla table key
+ l.emit(tokenLeftBracket)
+ return l.lexInsideTableKey
+}
+
+// Parse the key till "]]", but only bare keys are supported
+func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
+ for r := l.peek(); r != eof; r = l.peek() {
+ switch r {
+ case ']':
+ if l.currentTokenStop > l.currentTokenStart {
+ l.emit(tokenKeyGroupArray)
+ }
+ l.next()
+ if l.peek() != ']' {
+ break
+ }
+ l.next()
+ l.emit(tokenDoubleRightBracket)
+ return l.lexVoid
+ case '[':
+ return l.errorf("table array key cannot contain ']'")
+ default:
+ l.next()
+ }
+ }
+ return l.errorf("unclosed table array key")
+}
+
+// Parse the key till "]" but only bare keys are supported
+func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
+ for r := l.peek(); r != eof; r = l.peek() {
+ switch r {
+ case ']':
+ if l.currentTokenStop > l.currentTokenStart {
+ l.emit(tokenKeyGroup)
+ }
+ l.next()
+ l.emit(tokenRightBracket)
+ return l.lexVoid
+ case '[':
+ return l.errorf("table key cannot contain ']'")
+ default:
+ l.next()
+ }
+ }
+ return l.errorf("unclosed table key")
+}
+
+func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
+ l.next()
+ l.emit(tokenRightBracket)
+ if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '[' {
+ return l.errorf("cannot have ']' here")
+ }
+ l.brackets = l.brackets[:len(l.brackets)-1]
+ return l.lexRvalue
+}
+
+type validRuneFn func(r rune) bool
+
+func isValidHexRune(r rune) bool {
+ return r >= 'a' && r <= 'f' ||
+ r >= 'A' && r <= 'F' ||
+ r >= '0' && r <= '9' ||
+ r == '_'
+}
+
+func isValidOctalRune(r rune) bool {
+ return r >= '0' && r <= '7' || r == '_'
+}
+
+func isValidBinaryRune(r rune) bool {
+ return r == '0' || r == '1' || r == '_'
+}
+
+func (l *tomlLexer) lexNumber() tomlLexStateFn {
+ r := l.peek()
+
+ if r == '0' {
+ follow := l.peekString(2)
+ if len(follow) == 2 {
+ var isValidRune validRuneFn
+ switch follow[1] {
+ case 'x':
+ isValidRune = isValidHexRune
+ case 'o':
+ isValidRune = isValidOctalRune
+ case 'b':
+ isValidRune = isValidBinaryRune
+ default:
+ if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' {
+ return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1]))
+ }
+ }
+
+ if isValidRune != nil {
+ l.next()
+ l.next()
+ digitSeen := false
+ for {
+ next := l.peek()
+ if !isValidRune(next) {
+ break
+ }
+ digitSeen = true
+ l.next()
+ }
+
+ if !digitSeen {
+ return l.errorf("number needs at least one digit")
+ }
+
+ l.emit(tokenInteger)
+
+ return l.lexRvalue
+ }
+ }
+ }
+
+ if r == '+' || r == '-' {
+ l.next()
+ if l.follow("inf") {
+ return l.lexInf
+ }
+ if l.follow("nan") {
+ return l.lexNan
+ }
+ }
+
+ pointSeen := false
+ expSeen := false
+ digitSeen := false
+ for {
+ next := l.peek()
+ if next == '.' {
+ if pointSeen {
+ return l.errorf("cannot have two dots in one float")
+ }
+ l.next()
+ if !isDigit(l.peek()) {
+ return l.errorf("float cannot end with a dot")
+ }
+ pointSeen = true
+ } else if next == 'e' || next == 'E' {
+ expSeen = true
+ l.next()
+ r := l.peek()
+ if r == '+' || r == '-' {
+ l.next()
+ }
+ } else if isDigit(next) {
+ digitSeen = true
+ l.next()
+ } else if next == '_' {
+ l.next()
+ } else {
+ break
+ }
+ if pointSeen && !digitSeen {
+ return l.errorf("cannot start float with a dot")
+ }
+ }
+
+ if !digitSeen {
+ return l.errorf("no digit in that number")
+ }
+ if pointSeen || expSeen {
+ l.emit(tokenFloat)
+ } else {
+ l.emit(tokenInteger)
+ }
+ return l.lexRvalue
+}
+
+func (l *tomlLexer) run() {
+ for state := l.lexVoid; state != nil; {
+ state = state()
+ }
+}
+
+// Entry point
+func lexToml(inputBytes []byte) []token {
+ runes := bytes.Runes(inputBytes)
+ l := &tomlLexer{
+ input: runes,
+ tokens: make([]token, 0, 256),
+ line: 1,
+ col: 1,
+ endbufferLine: 1,
+ endbufferCol: 1,
+ }
+ l.run()
+ return l.tokens
+}