diff options
Diffstat (limited to 'vendor/github.com/ugorji/go/codec/reader.go')
-rw-r--r-- | vendor/github.com/ugorji/go/codec/reader.go | 645 |
1 files changed, 217 insertions, 428 deletions
diff --git a/vendor/github.com/ugorji/go/codec/reader.go b/vendor/github.com/ugorji/go/codec/reader.go index a683d9a2a..3fea9f4c7 100644 --- a/vendor/github.com/ugorji/go/codec/reader.go +++ b/vendor/github.com/ugorji/go/codec/reader.go @@ -3,7 +3,12 @@ package codec -import "io" +import ( + "bufio" + "bytes" + "io" + "strings" +) // decReader abstracts the reading source, allowing implementations that can // read from an io.Reader or directly off a byte slice with zero-copying. @@ -17,8 +22,7 @@ type decReader interface { readn1() byte readn2() [2]byte - // readn3 will read 3 bytes into the top-most elements of a 4-byte array - readn3() [4]byte + readn3() [3]byte readn4() [4]byte readn8() [8]byte // readn1eof() (v uint8, eof bool) @@ -62,55 +66,57 @@ const ( unreadByteCanUnread ) +// const defBufReaderSize = 4096 + // -------------------- -type ioDecReaderCommon struct { - r io.Reader // the reader passed in +// ioReaderByteScanner contains the io.Reader and io.ByteScanner interfaces +type ioReaderByteScanner interface { + io.Reader + io.ByteScanner + // ReadByte() (byte, error) + // UnreadByte() error + // Read(p []byte) (n int, err error) +} - n uint // num read +// ioReaderByteScannerT does a simple wrapper of a io.ByteScanner +// over a io.Reader +type ioReaderByteScannerT struct { + r io.Reader l byte // last byte ls unreadByteStatus // last byte status - b [6]byte // tiny buffer for reading single bytes - - blist *bytesFreelist - - bufr []byte // buffer for readTo/readUntil + _ [2]byte // padding + b [4]byte // tiny buffer for reading single bytes } -func (z *ioDecReaderCommon) reset(r io.Reader, blist *bytesFreelist) { - z.blist = blist - z.r = r - z.ls = unreadByteUndefined - z.l, z.n = 0, 0 - z.bufr = z.blist.check(z.bufr, 256) -} - -func (z *ioDecReaderCommon) numread() uint { - return z.n -} - -// ------------------------------------------ - -// ioDecReader is a decReader that reads off an io.Reader. -// -// It also has a fallback implementation of ByteScanner if needed. -type ioDecReader struct { - ioDecReaderCommon - - br io.ByteScanner - - x [64 + 48]byte // for: get struct field name, swallow valueTypeBytes, etc +func (z *ioReaderByteScannerT) ReadByte() (c byte, err error) { + if z.ls == unreadByteCanRead { + z.ls = unreadByteCanUnread + c = z.l + } else { + _, err = z.Read(z.b[:1]) + c = z.b[0] + } + return } -func (z *ioDecReader) reset(r io.Reader, blist *bytesFreelist) { - z.ioDecReaderCommon.reset(r, blist) - - z.br, _ = r.(io.ByteScanner) +func (z *ioReaderByteScannerT) UnreadByte() (err error) { + switch z.ls { + case unreadByteCanUnread: + z.ls = unreadByteCanRead + case unreadByteCanRead: + err = errDecUnreadByteLastByteNotRead + case unreadByteUndefined: + err = errDecUnreadByteNothingToRead + default: + err = errDecUnreadByteUnknown + } + return } -func (z *ioDecReader) Read(p []byte) (n int, err error) { +func (z *ioReaderByteScannerT) Read(p []byte) (n int, err error) { if len(p) == 0 { return } @@ -139,45 +145,79 @@ func (z *ioDecReader) Read(p []byte) (n int, err error) { return } -func (z *ioDecReader) ReadByte() (c byte, err error) { - if z.br != nil { - c, err = z.br.ReadByte() - if err == nil { - z.l = c - z.ls = unreadByteCanUnread +func (z *ioReaderByteScannerT) reset(r io.Reader) { + z.r = r + z.ls = unreadByteUndefined + z.l = 0 +} + +// ioDecReader is a decReader that reads off an io.Reader. +type ioDecReader struct { + rr ioReaderByteScannerT // the reader passed in, wrapped into a reader+bytescanner + + n uint // num read + + blist *bytesFreelist + + bufr []byte // buffer for readTo/readUntil + br ioReaderByteScanner // main reader used for Read|ReadByte|UnreadByte + bb *bufio.Reader // created internally, and reused on reset if needed + + x [64 + 40]byte // for: get struct field name, swallow valueTypeBytes, etc +} + +func (z *ioDecReader) reset(r io.Reader, bufsize int, blist *bytesFreelist) { + z.blist = blist + z.n = 0 + z.bufr = z.blist.check(z.bufr, 256) + z.br = nil + + var ok bool + + if bufsize <= 0 { + z.br, ok = r.(ioReaderByteScanner) + if !ok { + z.rr.reset(r) + z.br = &z.rr } return } - n, err := z.Read(z.b[:1]) - if n == 1 { - c = z.b[0] - if err == io.EOF { - err = nil // read was successful, so postpone EOF (till next time) + // bufsize > 0 ... + + // if bytes.[Buffer|Reader], no value in adding extra buffer + // if bufio.Reader, no value in extra buffer unless size changes + switch bb := r.(type) { + case *strings.Reader: + z.br = bb + case *bytes.Buffer: + z.br = bb + case *bytes.Reader: + z.br = bb + case *bufio.Reader: + if bb.Size() == bufsize { + z.br = bb } } - return -} -func (z *ioDecReader) UnreadByte() (err error) { - if z.br != nil { - err = z.br.UnreadByte() - if err == nil { - z.ls = unreadByteCanRead + if z.br == nil { + if z.bb != nil && z.bb.Size() == bufsize { + z.bb.Reset(r) + } else { + z.bb = bufio.NewReaderSize(r, bufsize) } - return + z.br = z.bb } +} - switch z.ls { - case unreadByteCanUnread: - z.ls = unreadByteCanRead - case unreadByteCanRead: - err = errDecUnreadByteLastByteNotRead - case unreadByteUndefined: - err = errDecUnreadByteNothingToRead - default: - err = errDecUnreadByteUnknown - } +func (z *ioDecReader) numread() uint { + return z.n +} + +func (z *ioDecReader) readn1() (b uint8) { + b, err := z.br.ReadByte() + halt.onerror(err) + z.n++ return } @@ -186,8 +226,8 @@ func (z *ioDecReader) readn2() (bs [2]byte) { return } -func (z *ioDecReader) readn3() (bs [4]byte) { - z.readb(bs[1:]) +func (z *ioDecReader) readn3() (bs [3]byte) { + z.readb(bs[:]) return } @@ -203,16 +243,16 @@ func (z *ioDecReader) readn8() (bs [8]byte) { func (z *ioDecReader) readx(n uint) (bs []byte) { if n == 0 { - return + return zeroByteSlice } if n < uint(len(z.x)) { bs = z.x[:n] } else { bs = make([]byte, n) } - _, err := readFull(z.r, bs) + nn, err := readFull(z.br, bs) + z.n += nn halt.onerror(err) - z.n += uint(len(bs)) return } @@ -220,38 +260,36 @@ func (z *ioDecReader) readb(bs []byte) { if len(bs) == 0 { return } - _, err := readFull(z.r, bs) + nn, err := readFull(z.br, bs) + z.n += nn halt.onerror(err) - z.n += uint(len(bs)) } -func (z *ioDecReader) readn1() (b uint8) { - b, err := z.ReadByte() - halt.onerror(err) - z.n++ - return -} - -func (z *ioDecReader) readn1eof() (b uint8, eof bool) { - b, err := z.ReadByte() - if err == nil { - z.n++ - } else if err == io.EOF { - eof = true - } else { - halt.onerror(err) - } - return -} +// func (z *ioDecReader) readn1eof() (b uint8, eof bool) { +// b, err := z.br.ReadByte() +// if err == nil { +// z.n++ +// } else if err == io.EOF { +// eof = true +// } else { +// halt.onerror(err) +// } +// return +// } func (z *ioDecReader) jsonReadNum() (bs []byte) { z.unreadn1() z.bufr = z.bufr[:0] LOOP: - i, eof := z.readn1eof() - if eof { + // i, eof := z.readn1eof() + i, err := z.br.ReadByte() + if err == io.EOF { return z.bufr } + if err != nil { + halt.onerror(err) + } + z.n++ if isNumberChar(i) { z.bufr = append(z.bufr, i) goto LOOP @@ -280,307 +318,45 @@ LOOP: return } +// func (z *ioDecReader) readUntil(stop byte) []byte { +// z.bufr = z.bufr[:0] +// LOOP: +// token := z.readn1() +// z.bufr = append(z.bufr, token) +// if token == stop { +// return z.bufr[:len(z.bufr)-1] +// } +// goto LOOP +// } + func (z *ioDecReader) readUntil(stop byte) []byte { z.bufr = z.bufr[:0] LOOP: token := z.readn1() - z.bufr = append(z.bufr, token) if token == stop { - return z.bufr[:len(z.bufr)-1] + return z.bufr } + z.bufr = append(z.bufr, token) goto LOOP } func (z *ioDecReader) unreadn1() { - err := z.UnreadByte() + err := z.br.UnreadByte() halt.onerror(err) z.n-- } // ------------------------------------ -type bufioDecReader struct { - ioDecReaderCommon - - c uint // cursor - buf []byte -} - -func (z *bufioDecReader) reset(r io.Reader, bufsize int, blist *bytesFreelist) { - z.ioDecReaderCommon.reset(r, blist) - z.c = 0 - if cap(z.buf) < bufsize { - z.buf = blist.get(bufsize) - } else { - z.buf = z.buf[:0] - } -} - -func (z *bufioDecReader) readb(p []byte) { - var n = uint(copy(p, z.buf[z.c:])) - z.n += n - z.c += n - if len(p) != int(n) { - z.readbFill(p, n, true, false) - } -} - -func readbFillHandleErr(err error, must, eof bool) (isEOF bool) { - if err == io.EOF { - isEOF = true - } - if must && !(eof && isEOF) { - halt.onerror(err) - } - return -} - -func (z *bufioDecReader) readbFill(p0 []byte, n uint, must, eof bool) (isEOF bool, err error) { - // at this point, there's nothing in z.buf to read (z.buf is fully consumed) - var p []byte - if p0 != nil { - p = p0[n:] - } - var n2 uint - if len(p) > cap(z.buf) { - n2, err = readFull(z.r, p) - if err != nil { - isEOF = readbFillHandleErr(err, must, eof) - return - } - n += n2 - z.n += n2 - // always keep last byte in z.buf - z.buf = z.buf[:1] - z.buf[0] = p[len(p)-1] - z.c = 1 - return - } - // z.c is now 0, and len(p) <= cap(z.buf) - var n1 int -LOOP: - // for len(p) > 0 && z.err == nil { - z.buf = z.buf[0:cap(z.buf)] - n1, err = z.r.Read(z.buf) - n2 = uint(n1) - if n2 == 0 && err != nil { - isEOF = readbFillHandleErr(err, must, eof) - return - } - err = nil - z.buf = z.buf[:n2] - z.c = 0 - if len(p) > 0 { - n2 = uint(copy(p, z.buf)) - z.c = n2 - n += n2 - z.n += n2 - p = p[n2:] - if len(p) > 0 { - goto LOOP - } - if z.c == 0 { - z.buf = z.buf[:1] - z.buf[0] = p[len(p)-1] - z.c = 1 - } - } - return -} - -func (z *bufioDecReader) readn1() (b byte) { - if z.c >= uint(len(z.buf)) { - z.readbFill(nil, 0, true, false) - } - b = z.buf[z.c] - z.c++ - z.n++ - return -} - -func (z *bufioDecReader) readn1eof() (b byte, eof bool) { - if z.c >= uint(len(z.buf)) { - eof, _ = z.readbFill(nil, 0, true, true) - if eof { - return - } - } - b = z.buf[z.c] - z.c++ - z.n++ - return -} - -func (z *bufioDecReader) unreadn1() { - if z.c == 0 { - halt.onerror(errDecUnreadByteNothingToRead) - } - z.c-- - z.n-- -} - -func (z *bufioDecReader) readn2() (bs [2]byte) { - z.readb(bs[:]) - return -} - -func (z *bufioDecReader) readn3() (bs [4]byte) { - z.readb(bs[1:]) - return -} - -func (z *bufioDecReader) readn4() (bs [4]byte) { - z.readb(bs[:]) - return -} - -func (z *bufioDecReader) readn8() (bs [8]byte) { - z.readb(bs[:]) - return -} - -func (z *bufioDecReader) readx(n uint) (bs []byte) { - if n == 0 { - // return - } else if z.c+n <= uint(len(z.buf)) { - bs = z.buf[z.c : z.c+n] - z.n += n - z.c += n - } else { - bs = make([]byte, n) - // n no longer used - can reuse - n = uint(copy(bs, z.buf[z.c:])) - z.n += n - z.c += n - z.readbFill(bs, n, true, false) - } - return -} - -func (z *bufioDecReader) jsonReadNum() (bs []byte) { - z.unreadn1() - z.bufr = z.bufr[:0] -LOOP: - i, eof := z.readn1eof() - if eof { - return z.bufr - } - if isNumberChar(i) { - z.bufr = append(z.bufr, i) - goto LOOP - } - z.unreadn1() - return z.bufr -} - -func (z *bufioDecReader) jsonReadAsisChars() (bs []byte) { - z.bufr = z.bufr[:0] -LOOP: - i := z.readn1() - z.bufr = append(z.bufr, i) - if i == '"' || i == '\\' { - return z.bufr - } - goto LOOP -} - -func (z *bufioDecReader) skipWhitespace() (token byte) { - i := z.c -LOOP: - if i < uint(len(z.buf)) { - // inline z.skipLoopFn(i) and refactor, so cost is within inline budget - token = z.buf[i] - i++ - if isWhitespaceChar(token) { - goto LOOP - } - z.n += i - 2 - z.c - z.c = i - return - } - return z.skipFillWhitespace() -} - -func (z *bufioDecReader) skipFillWhitespace() (token byte) { - z.n += uint(len(z.buf)) - z.c - var i, n2 int - var err error - for { - z.c = 0 - z.buf = z.buf[0:cap(z.buf)] - n2, err = z.r.Read(z.buf) - if n2 == 0 { - halt.onerror(err) - } - z.buf = z.buf[:n2] - for i, token = range z.buf { - if !isWhitespaceChar(token) { - z.n += (uint(i) - z.c) - 1 - z.loopFn(uint(i + 1)) - return - } - } - z.n += uint(n2) - } -} - -func (z *bufioDecReader) loopFn(i uint) { - z.c = i -} - -func (z *bufioDecReader) readUntil(stop byte) (out []byte) { - i := z.c -LOOP: - if i < uint(len(z.buf)) { - if z.buf[i] == stop { - z.n += (i - z.c) - 1 - i++ - out = z.buf[z.c:i] - z.c = i - goto FINISH - } - i++ - goto LOOP - } - out = z.readUntilFill(stop) -FINISH: - return out[:len(out)-1] -} - -func (z *bufioDecReader) readUntilFill(stop byte) []byte { - z.bufr = z.bufr[:0] - z.n += uint(len(z.buf)) - z.c - z.bufr = append(z.bufr, z.buf[z.c:]...) - for { - z.c = 0 - z.buf = z.buf[0:cap(z.buf)] - n1, err := z.r.Read(z.buf) - if n1 == 0 { - halt.onerror(err) - } - n2 := uint(n1) - z.buf = z.buf[:n2] - for i, token := range z.buf { - if token == stop { - z.n += (uint(i) - z.c) - 1 - z.bufr = append(z.bufr, z.buf[z.c:i+1]...) - z.loopFn(uint(i + 1)) - return z.bufr - } - } - z.bufr = append(z.bufr, z.buf...) - z.n += n2 - } -} - -// ------------------------------------ - // bytesDecReader is a decReader that reads off a byte slice with zero copying // // Note: we do not try to convert index'ing out of bounds to an io.EOF. // instead, we let it bubble up to the exported Encode/Decode method // and recover it as an io.EOF. // +// Every function here MUST defensively check bounds either explicitly +// or via a bounds check. +// // see panicValToErr(...) function in helper.go. type bytesDecReader struct { b []byte // data @@ -601,9 +377,11 @@ func (z *bytesDecReader) numread() uint { // However, we do it only once, and it's better than reslicing both z.b and return value. func (z *bytesDecReader) readx(n uint) (bs []byte) { - x := z.c + n - bs = z.b[z.c:x] - z.c = x + // x := z.c + n + // bs = z.b[z.c:x] + // z.c = x + bs = z.b[z.c : z.c+n] + z.c += n return } @@ -630,23 +408,26 @@ func (z *bytesDecReader) readb(bs []byte) { // return z.b[z.c-1] // } +// MARKER: readn{1,2,3,4,8} should throw an out of bounds error if past length. +// MARKER: readn1: explicitly ensure bounds check is done +// MARKER: readn{2,3,4,8}: ensure you slice z.b completely so we get bounds error if past end. + func (z *bytesDecReader) readn1() (v uint8) { v = z.b[z.c] z.c++ return } -// MARKER: for readn{2,3,4,8}, ensure you slice z.b completely so we get bounds error if past end. - func (z *bytesDecReader) readn2() (bs [2]byte) { // copy(bs[:], z.b[z.c:z.c+2]) - bs[1] = z.b[z.c+1] - bs[0] = z.b[z.c] + // bs[1] = z.b[z.c+1] + // bs[0] = z.b[z.c] + bs = okBytes2(z.b[z.c : z.c+2]) z.c += 2 return } -func (z *bytesDecReader) readn3() (bs [4]byte) { +func (z *bytesDecReader) readn3() (bs [3]byte) { // copy(bs[1:], z.b[z.c:z.c+3]) bs = okBytes3(z.b[z.c : z.c+3]) z.c += 3 @@ -668,14 +449,17 @@ func (z *bytesDecReader) readn8() (bs [8]byte) { } func (z *bytesDecReader) jsonReadNum() []byte { - z.c-- + z.c-- // unread i := z.c LOOP: + // gracefully handle end of slice, as end of stream is meaningful here if i < uint(len(z.b)) && isNumberChar(z.b[i]) { i++ goto LOOP } z.c, i = i, z.c + // MARKER: 20230103: byteSliceOf here prevents inlining of jsonReadNum + // return byteSliceOf(z.b, i, z.c) return z.b[i:z.c] } @@ -686,7 +470,8 @@ LOOP: i++ if token == '"' || token == '\\' { z.c, i = i, z.c - return z.b[i:z.c] + return byteSliceOf(z.b, i, z.c) + // return z.b[i:z.c] } goto LOOP } @@ -707,7 +492,8 @@ func (z *bytesDecReader) readUntil(stop byte) (out []byte) { i := z.c LOOP: if z.b[i] == stop { - out = z.b[z.c:i] + out = byteSliceOf(z.b, z.c, i) + // out = z.b[z.c:i] z.c = i + 1 return } @@ -718,6 +504,16 @@ LOOP: // -------------- type decRd struct { + rb bytesDecReader + ri *ioDecReader + + decReader + + bytes bool // is bytes reader + + // MARKER: these fields below should belong directly in Encoder. + // we pack them here for space efficiency and cache-line optimization. + mtr bool // is maptype a known type? str bool // is slicetype a known type? @@ -726,73 +522,66 @@ type decRd struct { jsms bool // is json handle, and MapKeyAsString cbor bool // is cbor handle - bytes bool // is bytes reader - bufio bool // is this a bufioDecReader? - - rb bytesDecReader - ri *ioDecReader - bi *bufioDecReader + cbreak bool // is a check breaker - decReader } -// From out benchmarking, we see the following in terms of performance: -// -// - interface calls -// - branch that can inline what it calls +// From out benchmarking, we see the following impact performance: // -// the if/else-if/else block is expensive to inline. -// Each node of this construct costs a lot and dominates the budget. -// Best to only do an if fast-path else block (so fast-path is inlined). -// This is irrespective of inlineExtraCallCost set in $GOROOT/src/cmd/compile/internal/gc/inl.go +// - functions that are too big to inline +// - interface calls (as no inlining can occur) // -// In decRd methods below, we delegate all IO functions into their own methods. -// This allows for the inlining of the common path when z.bytes=true. -// Go 1.12+ supports inlining methods with up to 1 inlined function (or 2 if no other constructs). +// decRd is designed to embed a decReader, and then re-implement some of the decReader +// methods using a conditional branch. // -// However, up through Go 1.13, decRd's readXXX, skip and unreadXXX methods are not inlined. -// Consequently, there is no benefit to do the xxxIO methods for decRd at this time. -// Instead, we have a if/else-if/else block so that IO calls do not have to jump through -// a second unnecessary function call. +// We only override the ones where the bytes version is inlined AND the wrapper method +// (containing the bytes version alongside a conditional branch) is also inlined. // -// If golang inlining gets better and bytesDecReader methods can be inlined, -// then we can revert to using these 2 functions so the bytesDecReader -// methods are inlined and the IO paths call out to a function. +// We use ./run.sh -z to check. // -// decRd is designed to embed a decReader, and then re-implement some of the decReader -// methods using a conditional branch. We only override the ones that have a bytes version -// that is small enough to be inlined. We use ./run.sh -z to check. -// Right now, only numread and readn1 can be inlined. +// Right now, only numread and "carefully crafted" readn1 can be inlined. func (z *decRd) numread() uint { if z.bytes { return z.rb.numread() - } else if z.bufio { - return z.bi.numread() - } else { - return z.ri.numread() } + return z.ri.numread() } func (z *decRd) readn1() (v uint8) { if z.bytes { - // MARKER: manually inline, else this function is not inlined. - // Keep in sync with bytesDecReader.readn1 // return z.rb.readn1() + // MARKER: calling z.rb.readn1() prevents decRd.readn1 from being inlined. + // copy code, to manually inline and explicitly return here. + // Keep in sync with bytesDecReader.readn1 v = z.rb.b[z.rb.c] z.rb.c++ - } else { - v = z.readn1IO() - } - return -} -func (z *decRd) readn1IO() uint8 { - if z.bufio { - return z.bi.readn1() + return } return z.ri.readn1() } +// func (z *decRd) readn4() [4]byte { +// if z.bytes { +// return z.rb.readn4() +// } +// return z.ri.readn4() +// } + +// func (z *decRd) readn3() [3]byte { +// if z.bytes { +// return z.rb.readn3() +// } +// return z.ri.readn3() +// } + +// func (z *decRd) skipWhitespace() byte { +// if z.bytes { +// return z.rb.skipWhitespace() +// } +// return z.ri.skipWhitespace() +// } + type devNullReader struct{} func (devNullReader) Read(p []byte) (int, error) { return 0, io.EOF } |