diff options
Diffstat (limited to 'vendor/github.com/philhofer/fwd/reader.go')
| -rw-r--r-- | vendor/github.com/philhofer/fwd/reader.go | 445 |
1 files changed, 445 insertions, 0 deletions
diff --git a/vendor/github.com/philhofer/fwd/reader.go b/vendor/github.com/philhofer/fwd/reader.go new file mode 100644 index 000000000..a24a896e2 --- /dev/null +++ b/vendor/github.com/philhofer/fwd/reader.go @@ -0,0 +1,445 @@ +// Package fwd provides a buffered reader +// and writer. Each has methods that help improve +// the encoding/decoding performance of some binary +// protocols. +// +// The [Writer] and [Reader] type provide similar +// functionality to their counterparts in [bufio], plus +// a few extra utility methods that simplify read-ahead +// and write-ahead. I wrote this package to improve serialization +// performance for http://github.com/tinylib/msgp, +// where it provided about a 2x speedup over `bufio` for certain +// workloads. However, care must be taken to understand the semantics of the +// extra methods provided by this package, as they allow +// the user to access and manipulate the buffer memory +// directly. +// +// The extra methods for [Reader] are [Reader.Peek], [Reader.Skip] +// and [Reader.Next]. (*fwd.Reader).Peek, unlike (*bufio.Reader).Peek, +// will re-allocate the read buffer in order to accommodate arbitrarily +// large read-ahead. (*fwd.Reader).Skip skips the next 'n' bytes +// in the stream, and uses the [io.Seeker] interface if the underlying +// stream implements it. (*fwd.Reader).Next returns a slice pointing +// to the next 'n' bytes in the read buffer (like Reader.Peek), but also +// increments the read position. This allows users to process streams +// in arbitrary block sizes without having to manage appropriately-sized +// slices. Additionally, obviating the need to copy the data from the +// buffer to another location in memory can improve performance dramatically +// in CPU-bound applications. +// +// [Writer] only has one extra method, which is (*fwd.Writer).Next, which +// returns a slice pointing to the next 'n' bytes of the writer, and increments +// the write position by the length of the returned slice. This allows users +// to write directly to the end of the buffer. +package fwd + +import ( + "io" + "os" +) + +const ( + // DefaultReaderSize is the default size of the read buffer + DefaultReaderSize = 2048 + + // minimum read buffer; straight from bufio + minReaderSize = 16 +) + +// NewReader returns a new *Reader that reads from 'r' +func NewReader(r io.Reader) *Reader { + return NewReaderSize(r, DefaultReaderSize) +} + +// NewReaderSize returns a new *Reader that +// reads from 'r' and has a buffer size 'n'. +func NewReaderSize(r io.Reader, n int) *Reader { + buf := make([]byte, 0, max(n, minReaderSize)) + return NewReaderBuf(r, buf) +} + +// NewReaderBuf returns a new *Reader that +// reads from 'r' and uses 'buf' as a buffer. +// 'buf' is not used when has smaller capacity than 16, +// custom buffer is allocated instead. +func NewReaderBuf(r io.Reader, buf []byte) *Reader { + if cap(buf) < minReaderSize { + buf = make([]byte, 0, minReaderSize) + } + buf = buf[:0] + rd := &Reader{ + r: r, + data: buf, + } + if s, ok := r.(io.Seeker); ok { + rd.rs = s + } + return rd +} + +// Reader is a buffered look-ahead reader +type Reader struct { + r io.Reader // underlying reader + + // data[n:len(data)] is buffered data; data[len(data):cap(data)] is free buffer space + data []byte // data + n int // read offset + inputOffset int64 // offset in the input stream + state error // last read error + + // if the reader past to NewReader was + // also an io.Seeker, this is non-nil + rs io.Seeker +} + +// Reset resets the underlying reader +// and the read buffer. +func (r *Reader) Reset(rd io.Reader) { + r.r = rd + r.data = r.data[0:0] + r.n = 0 + r.inputOffset = 0 + r.state = nil + if s, ok := rd.(io.Seeker); ok { + r.rs = s + } else { + r.rs = nil + } +} + +// more() does one read on the underlying reader +func (r *Reader) more() { + // move data backwards so that + // the read offset is 0; this way + // we can supply the maximum number of + // bytes to the reader + if r.n != 0 { + if r.n < len(r.data) { + r.data = r.data[:copy(r.data[0:], r.data[r.n:])] + } else { + r.data = r.data[:0] + } + r.n = 0 + } + var a int + a, r.state = r.r.Read(r.data[len(r.data):cap(r.data)]) + if a == 0 && r.state == nil { + r.state = io.ErrNoProgress + return + } else if a > 0 && r.state == io.EOF { + // discard the io.EOF if we read more than 0 bytes. + // the next call to Read should return io.EOF again. + r.state = nil + } else if r.state != nil { + return + } + r.data = r.data[:len(r.data)+a] +} + +// pop error +func (r *Reader) err() (e error) { + e, r.state = r.state, nil + return +} + +// pop error; EOF -> io.ErrUnexpectedEOF +func (r *Reader) noEOF() (e error) { + e, r.state = r.state, nil + if e == io.EOF { + e = io.ErrUnexpectedEOF + } + return +} + +// buffered bytes +func (r *Reader) buffered() int { return len(r.data) - r.n } + +// Buffered returns the number of bytes currently in the buffer +func (r *Reader) Buffered() int { return len(r.data) - r.n } + +// BufferSize returns the total size of the buffer +func (r *Reader) BufferSize() int { return cap(r.data) } + +// InputOffset returns the input stream byte offset of the current reader position +func (r *Reader) InputOffset() int64 { return r.inputOffset } + +// Peek returns the next 'n' buffered bytes, +// reading from the underlying reader if necessary. +// It will only return a slice shorter than 'n' bytes +// if it also returns an error. Peek does not advance +// the reader. EOF errors are *not* returned as +// io.ErrUnexpectedEOF. +func (r *Reader) Peek(n int) ([]byte, error) { + // in the degenerate case, + // we may need to realloc + // (the caller asked for more + // bytes than the size of the buffer) + if cap(r.data) < n { + old := r.data[r.n:] + r.data = make([]byte, n+r.buffered()) + r.data = r.data[:copy(r.data, old)] + r.n = 0 + } + + // keep filling until + // we hit an error or + // read enough bytes + for r.buffered() < n && r.state == nil { + r.more() + } + + // we must have hit an error + if r.buffered() < n { + return r.data[r.n:], r.err() + } + + return r.data[r.n : r.n+n], nil +} + +func (r *Reader) PeekByte() (b byte, err error) { + if len(r.data)-r.n >= 1 { + b = r.data[r.n] + } else { + b, err = r.peekByte() + } + return +} + +func (r *Reader) peekByte() (byte, error) { + const n = 1 + if cap(r.data) < n { + old := r.data[r.n:] + r.data = make([]byte, n+r.buffered()) + r.data = r.data[:copy(r.data, old)] + r.n = 0 + } + + // keep filling until + // we hit an error or + // read enough bytes + for r.buffered() < n && r.state == nil { + r.more() + } + + // we must have hit an error + if r.buffered() < n { + return 0, r.err() + } + return r.data[r.n], nil +} + +// discard(n) discards up to 'n' buffered bytes, and +// and returns the number of bytes discarded +func (r *Reader) discard(n int) int { + inbuf := r.buffered() + if inbuf <= n { + r.n = 0 + r.inputOffset += int64(inbuf) + r.data = r.data[:0] + return inbuf + } + r.n += n + r.inputOffset += int64(n) + return n +} + +// Skip moves the reader forward 'n' bytes. +// Returns the number of bytes skipped and any +// errors encountered. It is analogous to Seek(n, 1). +// If the underlying reader implements io.Seeker, then +// that method will be used to skip forward. +// +// If the reader encounters +// an EOF before skipping 'n' bytes, it +// returns [io.ErrUnexpectedEOF]. If the +// underlying reader implements [io.Seeker], then +// those rules apply instead. (Many implementations +// will not return [io.EOF] until the next call +// to Read). +func (r *Reader) Skip(n int) (int, error) { + if n < 0 { + return 0, os.ErrInvalid + } + + // discard some or all of the current buffer + skipped := r.discard(n) + + // if we can Seek() through the remaining bytes, do that + if n > skipped && r.rs != nil { + nn, err := r.rs.Seek(int64(n-skipped), 1) + r.inputOffset += nn + return int(nn) + skipped, err + } + // otherwise, keep filling the buffer + // and discarding it up to 'n' + for skipped < n && r.state == nil { + r.more() + skipped += r.discard(n - skipped) + } + return skipped, r.noEOF() +} + +// Next returns the next 'n' bytes in the stream. +// Unlike Peek, Next advances the reader position. +// The returned bytes point to the same +// data as the buffer, so the slice is +// only valid until the next reader method call. +// An EOF is considered an unexpected error. +// If an the returned slice is less than the +// length asked for, an error will be returned, +// and the reader position will not be incremented. +func (r *Reader) Next(n int) (b []byte, err error) { + if r.state == nil && len(r.data)-r.n >= n { + b = r.data[r.n : r.n+n] + r.n += n + r.inputOffset += int64(n) + } else { + b, err = r.next(n) + } + return +} + +func (r *Reader) next(n int) ([]byte, error) { + // in case the buffer is too small + if cap(r.data) < n { + old := r.data[r.n:] + r.data = make([]byte, n+r.buffered()) + r.data = r.data[:copy(r.data, old)] + r.n = 0 + } + + // fill at least 'n' bytes + for r.buffered() < n && r.state == nil { + r.more() + } + + if r.buffered() < n { + return r.data[r.n:], r.noEOF() + } + out := r.data[r.n : r.n+n] + r.n += n + r.inputOffset += int64(n) + return out, nil +} + +// Read implements [io.Reader]. +func (r *Reader) Read(b []byte) (int, error) { + // if we have data in the buffer, just + // return that. + if r.buffered() != 0 { + x := copy(b, r.data[r.n:]) + r.n += x + r.inputOffset += int64(x) + return x, nil + } + var n int + // we have no buffered data; determine + // whether or not to buffer or call + // the underlying reader directly + if len(b) >= cap(r.data) { + n, r.state = r.r.Read(b) + } else { + r.more() + n = copy(b, r.data) + r.n = n + } + if n == 0 { + return 0, r.err() + } + + r.inputOffset += int64(n) + + return n, nil +} + +// ReadFull attempts to read len(b) bytes into +// 'b'. It returns the number of bytes read into +// 'b', and an error if it does not return len(b). +// EOF is considered an unexpected error. +func (r *Reader) ReadFull(b []byte) (int, error) { + var n int // read into b + var nn int // scratch + l := len(b) + // either read buffered data, + // or read directly for the underlying + // buffer, or fetch more buffered data. + for n < l && r.state == nil { + if r.buffered() != 0 { + nn = copy(b[n:], r.data[r.n:]) + n += nn + r.n += nn + r.inputOffset += int64(nn) + } else if l-n > cap(r.data) { + nn, r.state = r.r.Read(b[n:]) + n += nn + r.inputOffset += int64(nn) + } else { + r.more() + } + } + if n < l { + return n, r.noEOF() + } + return n, nil +} + +// ReadByte implements [io.ByteReader]. +func (r *Reader) ReadByte() (byte, error) { + for r.buffered() < 1 && r.state == nil { + r.more() + } + if r.buffered() < 1 { + return 0, r.err() + } + b := r.data[r.n] + r.n++ + r.inputOffset++ + + return b, nil +} + +// WriteTo implements [io.WriterTo]. +func (r *Reader) WriteTo(w io.Writer) (int64, error) { + var ( + i int64 + ii int + err error + ) + // first, clear buffer + if r.buffered() > 0 { + ii, err = w.Write(r.data[r.n:]) + i += int64(ii) + if err != nil { + return i, err + } + r.data = r.data[0:0] + r.n = 0 + r.inputOffset += int64(ii) + } + for r.state == nil { + // here we just do + // 1:1 reads and writes + r.more() + if r.buffered() > 0 { + ii, err = w.Write(r.data) + i += int64(ii) + if err != nil { + return i, err + } + r.data = r.data[0:0] + r.n = 0 + r.inputOffset += int64(ii) + } + } + if r.state != io.EOF { + return i, r.err() + } + return i, nil +} + +func max(a int, b int) int { + if a < b { + return b + } + return a +} |
