diff options
Diffstat (limited to 'vendor/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go')
-rw-r--r-- | vendor/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go | 528 |
1 files changed, 528 insertions, 0 deletions
diff --git a/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go new file mode 100644 index 000000000..cd4843a3a --- /dev/null +++ b/vendor/go.mongodb.org/mongo-driver/bson/bsonrw/json_scanner.go @@ -0,0 +1,528 @@ +// Copyright (C) MongoDB, Inc. 2017-present. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + +package bsonrw + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "strconv" + "unicode" + "unicode/utf16" +) + +type jsonTokenType byte + +const ( + jttBeginObject jsonTokenType = iota + jttEndObject + jttBeginArray + jttEndArray + jttColon + jttComma + jttInt32 + jttInt64 + jttDouble + jttString + jttBool + jttNull + jttEOF +) + +type jsonToken struct { + t jsonTokenType + v interface{} + p int +} + +type jsonScanner struct { + r io.Reader + buf []byte + pos int + lastReadErr error +} + +// nextToken returns the next JSON token if one exists. A token is a character +// of the JSON grammar, a number, a string, or a literal. +func (js *jsonScanner) nextToken() (*jsonToken, error) { + c, err := js.readNextByte() + + // keep reading until a non-space is encountered (break on read error or EOF) + for isWhiteSpace(c) && err == nil { + c, err = js.readNextByte() + } + + if err == io.EOF { + return &jsonToken{t: jttEOF}, nil + } else if err != nil { + return nil, err + } + + // switch on the character + switch c { + case '{': + return &jsonToken{t: jttBeginObject, v: byte('{'), p: js.pos - 1}, nil + case '}': + return &jsonToken{t: jttEndObject, v: byte('}'), p: js.pos - 1}, nil + case '[': + return &jsonToken{t: jttBeginArray, v: byte('['), p: js.pos - 1}, nil + case ']': + return &jsonToken{t: jttEndArray, v: byte(']'), p: js.pos - 1}, nil + case ':': + return &jsonToken{t: jttColon, v: byte(':'), p: js.pos - 1}, nil + case ',': + return &jsonToken{t: jttComma, v: byte(','), p: js.pos - 1}, nil + case '"': // RFC-8259 only allows for double quotes (") not single (') + return js.scanString() + default: + // check if it's a number + if c == '-' || isDigit(c) { + return js.scanNumber(c) + } else if c == 't' || c == 'f' || c == 'n' { + // maybe a literal + return js.scanLiteral(c) + } else { + return nil, fmt.Errorf("invalid JSON input. Position: %d. Character: %c", js.pos-1, c) + } + } +} + +// readNextByte attempts to read the next byte from the buffer. If the buffer +// has been exhausted, this function calls readIntoBuf, thus refilling the +// buffer and resetting the read position to 0 +func (js *jsonScanner) readNextByte() (byte, error) { + if js.pos >= len(js.buf) { + err := js.readIntoBuf() + + if err != nil { + return 0, err + } + } + + b := js.buf[js.pos] + js.pos++ + + return b, nil +} + +// readNNextBytes reads n bytes into dst, starting at offset +func (js *jsonScanner) readNNextBytes(dst []byte, n, offset int) error { + var err error + + for i := 0; i < n; i++ { + dst[i+offset], err = js.readNextByte() + if err != nil { + return err + } + } + + return nil +} + +// readIntoBuf reads up to 512 bytes from the scanner's io.Reader into the buffer +func (js *jsonScanner) readIntoBuf() error { + if js.lastReadErr != nil { + js.buf = js.buf[:0] + js.pos = 0 + return js.lastReadErr + } + + if cap(js.buf) == 0 { + js.buf = make([]byte, 0, 512) + } + + n, err := js.r.Read(js.buf[:cap(js.buf)]) + if err != nil { + js.lastReadErr = err + if n > 0 { + err = nil + } + } + js.buf = js.buf[:n] + js.pos = 0 + + return err +} + +func isWhiteSpace(c byte) bool { + return c == ' ' || c == '\t' || c == '\r' || c == '\n' +} + +func isDigit(c byte) bool { + return unicode.IsDigit(rune(c)) +} + +func isValueTerminator(c byte) bool { + return c == ',' || c == '}' || c == ']' || isWhiteSpace(c) +} + +// getu4 decodes the 4-byte hex sequence from the beginning of s, returning the hex value as a rune, +// or it returns -1. Note that the "\u" from the unicode escape sequence should not be present. +// It is copied and lightly modified from the Go JSON decode function at +// https://github.com/golang/go/blob/1b0a0316802b8048d69da49dc23c5a5ab08e8ae8/src/encoding/json/decode.go#L1169-L1188 +func getu4(s []byte) rune { + if len(s) < 4 { + return -1 + } + var r rune + for _, c := range s[:4] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) + } + return r +} + +// scanString reads from an opening '"' to a closing '"' and handles escaped characters +func (js *jsonScanner) scanString() (*jsonToken, error) { + var b bytes.Buffer + var c byte + var err error + + p := js.pos - 1 + + for { + c, err = js.readNextByte() + if err != nil { + if err == io.EOF { + return nil, errors.New("end of input in JSON string") + } + return nil, err + } + + evalNextChar: + switch c { + case '\\': + c, err = js.readNextByte() + if err != nil { + if err == io.EOF { + return nil, errors.New("end of input in JSON string") + } + return nil, err + } + + evalNextEscapeChar: + switch c { + case '"', '\\', '/': + b.WriteByte(c) + case 'b': + b.WriteByte('\b') + case 'f': + b.WriteByte('\f') + case 'n': + b.WriteByte('\n') + case 'r': + b.WriteByte('\r') + case 't': + b.WriteByte('\t') + case 'u': + us := make([]byte, 4) + err = js.readNNextBytes(us, 4, 0) + if err != nil { + return nil, fmt.Errorf("invalid unicode sequence in JSON string: %s", us) + } + + rn := getu4(us) + + // If the rune we just decoded is the high or low value of a possible surrogate pair, + // try to decode the next sequence as the low value of a surrogate pair. We're + // expecting the next sequence to be another Unicode escape sequence (e.g. "\uDD1E"), + // but need to handle cases where the input is not a valid surrogate pair. + // For more context on unicode surrogate pairs, see: + // https://www.christianfscott.com/rust-chars-vs-go-runes/ + // https://www.unicode.org/glossary/#high_surrogate_code_point + if utf16.IsSurrogate(rn) { + c, err = js.readNextByte() + if err != nil { + if err == io.EOF { + return nil, errors.New("end of input in JSON string") + } + return nil, err + } + + // If the next value isn't the beginning of a backslash escape sequence, write + // the Unicode replacement character for the surrogate value and goto the + // beginning of the next char eval block. + if c != '\\' { + b.WriteRune(unicode.ReplacementChar) + goto evalNextChar + } + + c, err = js.readNextByte() + if err != nil { + if err == io.EOF { + return nil, errors.New("end of input in JSON string") + } + return nil, err + } + + // If the next value isn't the beginning of a unicode escape sequence, write the + // Unicode replacement character for the surrogate value and goto the beginning + // of the next escape char eval block. + if c != 'u' { + b.WriteRune(unicode.ReplacementChar) + goto evalNextEscapeChar + } + + err = js.readNNextBytes(us, 4, 0) + if err != nil { + return nil, fmt.Errorf("invalid unicode sequence in JSON string: %s", us) + } + + rn2 := getu4(us) + + // Try to decode the pair of runes as a utf16 surrogate pair. If that fails, write + // the Unicode replacement character for the surrogate value and the 2nd decoded rune. + if rnPair := utf16.DecodeRune(rn, rn2); rnPair != unicode.ReplacementChar { + b.WriteRune(rnPair) + } else { + b.WriteRune(unicode.ReplacementChar) + b.WriteRune(rn2) + } + + break + } + + b.WriteRune(rn) + default: + return nil, fmt.Errorf("invalid escape sequence in JSON string '\\%c'", c) + } + case '"': + return &jsonToken{t: jttString, v: b.String(), p: p}, nil + default: + b.WriteByte(c) + } + } +} + +// scanLiteral reads an unquoted sequence of characters and determines if it is one of +// three valid JSON literals (true, false, null); if so, it returns the appropriate +// jsonToken; otherwise, it returns an error +func (js *jsonScanner) scanLiteral(first byte) (*jsonToken, error) { + p := js.pos - 1 + + lit := make([]byte, 4) + lit[0] = first + + err := js.readNNextBytes(lit, 3, 1) + if err != nil { + return nil, err + } + + c5, err := js.readNextByte() + + if bytes.Equal([]byte("true"), lit) && (isValueTerminator(c5) || err == io.EOF) { + js.pos = int(math.Max(0, float64(js.pos-1))) + return &jsonToken{t: jttBool, v: true, p: p}, nil + } else if bytes.Equal([]byte("null"), lit) && (isValueTerminator(c5) || err == io.EOF) { + js.pos = int(math.Max(0, float64(js.pos-1))) + return &jsonToken{t: jttNull, v: nil, p: p}, nil + } else if bytes.Equal([]byte("fals"), lit) { + if c5 == 'e' { + c5, err = js.readNextByte() + + if isValueTerminator(c5) || err == io.EOF { + js.pos = int(math.Max(0, float64(js.pos-1))) + return &jsonToken{t: jttBool, v: false, p: p}, nil + } + } + } + + return nil, fmt.Errorf("invalid JSON literal. Position: %d, literal: %s", p, lit) +} + +type numberScanState byte + +const ( + nssSawLeadingMinus numberScanState = iota + nssSawLeadingZero + nssSawIntegerDigits + nssSawDecimalPoint + nssSawFractionDigits + nssSawExponentLetter + nssSawExponentSign + nssSawExponentDigits + nssDone + nssInvalid +) + +// scanNumber reads a JSON number (according to RFC-8259) +func (js *jsonScanner) scanNumber(first byte) (*jsonToken, error) { + var b bytes.Buffer + var s numberScanState + var c byte + var err error + + t := jttInt64 // assume it's an int64 until the type can be determined + start := js.pos - 1 + + b.WriteByte(first) + + switch first { + case '-': + s = nssSawLeadingMinus + case '0': + s = nssSawLeadingZero + default: + s = nssSawIntegerDigits + } + + for { + c, err = js.readNextByte() + + if err != nil && err != io.EOF { + return nil, err + } + + switch s { + case nssSawLeadingMinus: + switch c { + case '0': + s = nssSawLeadingZero + b.WriteByte(c) + default: + if isDigit(c) { + s = nssSawIntegerDigits + b.WriteByte(c) + } else { + s = nssInvalid + } + } + case nssSawLeadingZero: + switch c { + case '.': + s = nssSawDecimalPoint + b.WriteByte(c) + case 'e', 'E': + s = nssSawExponentLetter + b.WriteByte(c) + case '}', ']', ',': + s = nssDone + default: + if isWhiteSpace(c) || err == io.EOF { + s = nssDone + } else { + s = nssInvalid + } + } + case nssSawIntegerDigits: + switch c { + case '.': + s = nssSawDecimalPoint + b.WriteByte(c) + case 'e', 'E': + s = nssSawExponentLetter + b.WriteByte(c) + case '}', ']', ',': + s = nssDone + default: + if isWhiteSpace(c) || err == io.EOF { + s = nssDone + } else if isDigit(c) { + s = nssSawIntegerDigits + b.WriteByte(c) + } else { + s = nssInvalid + } + } + case nssSawDecimalPoint: + t = jttDouble + if isDigit(c) { + s = nssSawFractionDigits + b.WriteByte(c) + } else { + s = nssInvalid + } + case nssSawFractionDigits: + switch c { + case 'e', 'E': + s = nssSawExponentLetter + b.WriteByte(c) + case '}', ']', ',': + s = nssDone + default: + if isWhiteSpace(c) || err == io.EOF { + s = nssDone + } else if isDigit(c) { + s = nssSawFractionDigits + b.WriteByte(c) + } else { + s = nssInvalid + } + } + case nssSawExponentLetter: + t = jttDouble + switch c { + case '+', '-': + s = nssSawExponentSign + b.WriteByte(c) + default: + if isDigit(c) { + s = nssSawExponentDigits + b.WriteByte(c) + } else { + s = nssInvalid + } + } + case nssSawExponentSign: + if isDigit(c) { + s = nssSawExponentDigits + b.WriteByte(c) + } else { + s = nssInvalid + } + case nssSawExponentDigits: + switch c { + case '}', ']', ',': + s = nssDone + default: + if isWhiteSpace(c) || err == io.EOF { + s = nssDone + } else if isDigit(c) { + s = nssSawExponentDigits + b.WriteByte(c) + } else { + s = nssInvalid + } + } + } + + switch s { + case nssInvalid: + return nil, fmt.Errorf("invalid JSON number. Position: %d", start) + case nssDone: + js.pos = int(math.Max(0, float64(js.pos-1))) + if t != jttDouble { + v, err := strconv.ParseInt(b.String(), 10, 64) + if err == nil { + if v < math.MinInt32 || v > math.MaxInt32 { + return &jsonToken{t: jttInt64, v: v, p: start}, nil + } + + return &jsonToken{t: jttInt32, v: int32(v), p: start}, nil + } + } + + v, err := strconv.ParseFloat(b.String(), 64) + if err != nil { + return nil, err + } + + return &jsonToken{t: jttDouble, v: v, p: start}, nil + } + } +} |