diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/huff0/decompress_amd64.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/huff0/decompress_amd64.go | 226 |
1 files changed, 0 insertions, 226 deletions
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go deleted file mode 100644 index ba7e8e6b0..000000000 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go +++ /dev/null @@ -1,226 +0,0 @@ -//go:build amd64 && !appengine && !noasm && gc -// +build amd64,!appengine,!noasm,gc - -// This file contains the specialisation of Decoder.Decompress4X -// and Decoder.Decompress1X that use an asm implementation of thir main loops. -package huff0 - -import ( - "errors" - "fmt" - - "github.com/klauspost/compress/internal/cpuinfo" -) - -// decompress4x_main_loop_x86 is an x86 assembler implementation -// of Decompress4X when tablelog > 8. -// -//go:noescape -func decompress4x_main_loop_amd64(ctx *decompress4xContext) - -// decompress4x_8b_loop_x86 is an x86 assembler implementation -// of Decompress4X when tablelog <= 8 which decodes 4 entries -// per loop. -// -//go:noescape -func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) - -// fallback8BitSize is the size where using Go version is faster. -const fallback8BitSize = 800 - -type decompress4xContext struct { - pbr *[4]bitReaderShifted - peekBits uint8 - out *byte - dstEvery int - tbl *dEntrySingle - decoded int - limit *byte -} - -// Decompress4X will decompress a 4X encoded stream. -// The length of the supplied input must match the end of a block exactly. -// The *capacity* of the dst slice must match the destination size of -// the uncompressed data exactly. -func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { - if len(d.dt.single) == 0 { - return nil, errors.New("no table loaded") - } - if len(src) < 6+(4*1) { - return nil, errors.New("input too small") - } - - use8BitTables := d.actualTableLog <= 8 - if cap(dst) < fallback8BitSize && use8BitTables { - return d.decompress4X8bit(dst, src) - } - - var br [4]bitReaderShifted - // Decode "jump table" - start := 6 - for i := 0; i < 3; i++ { - length := int(src[i*2]) | (int(src[i*2+1]) << 8) - if start+length >= len(src) { - return nil, errors.New("truncated input (or invalid offset)") - } - err := br[i].init(src[start : start+length]) - if err != nil { - return nil, err - } - start += length - } - err := br[3].init(src[start:]) - if err != nil { - return nil, err - } - - // destination, offset to match first output - dstSize := cap(dst) - dst = dst[:dstSize] - out := dst - dstEvery := (dstSize + 3) / 4 - - const tlSize = 1 << tableLogMax - const tlMask = tlSize - 1 - single := d.dt.single[:tlSize] - - var decoded int - - if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) { - ctx := decompress4xContext{ - pbr: &br, - peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() - out: &out[0], - dstEvery: dstEvery, - tbl: &single[0], - limit: &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last. - } - if use8BitTables { - decompress4x_8b_main_loop_amd64(&ctx) - } else { - decompress4x_main_loop_amd64(&ctx) - } - - decoded = ctx.decoded - out = out[decoded/4:] - } - - // Decode remaining. - remainBytes := dstEvery - (decoded / 4) - for i := range br { - offset := dstEvery * i - endsAt := offset + remainBytes - if endsAt > len(out) { - endsAt = len(out) - } - br := &br[i] - bitsLeft := br.remaining() - for bitsLeft > 0 { - br.fill() - if offset >= endsAt { - return nil, errors.New("corruption detected: stream overrun 4") - } - - // Read value and increment offset. - val := br.peekBitsFast(d.actualTableLog) - v := single[val&tlMask].entry - nBits := uint8(v) - br.advance(nBits) - bitsLeft -= uint(nBits) - out[offset] = uint8(v >> 8) - offset++ - } - if offset != endsAt { - return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) - } - decoded += offset - dstEvery*i - err = br.close() - if err != nil { - return nil, err - } - } - if dstSize != decoded { - return nil, errors.New("corruption detected: short output block") - } - return dst, nil -} - -// decompress4x_main_loop_x86 is an x86 assembler implementation -// of Decompress1X when tablelog > 8. -// -//go:noescape -func decompress1x_main_loop_amd64(ctx *decompress1xContext) - -// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation -// of Decompress1X when tablelog > 8. -// -//go:noescape -func decompress1x_main_loop_bmi2(ctx *decompress1xContext) - -type decompress1xContext struct { - pbr *bitReaderShifted - peekBits uint8 - out *byte - outCap int - tbl *dEntrySingle - decoded int -} - -// Error reported by asm implementations -const error_max_decoded_size_exeeded = -1 - -// Decompress1X will decompress a 1X encoded stream. -// The cap of the output buffer will be the maximum decompressed size. -// The length of the supplied input must match the end of a block exactly. -func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) { - if len(d.dt.single) == 0 { - return nil, errors.New("no table loaded") - } - var br bitReaderShifted - err := br.init(src) - if err != nil { - return dst, err - } - maxDecodedSize := cap(dst) - dst = dst[:maxDecodedSize] - - const tlSize = 1 << tableLogMax - const tlMask = tlSize - 1 - - if maxDecodedSize >= 4 { - ctx := decompress1xContext{ - pbr: &br, - out: &dst[0], - outCap: maxDecodedSize, - peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() - tbl: &d.dt.single[0], - } - - if cpuinfo.HasBMI2() { - decompress1x_main_loop_bmi2(&ctx) - } else { - decompress1x_main_loop_amd64(&ctx) - } - if ctx.decoded == error_max_decoded_size_exeeded { - return nil, ErrMaxDecodedSizeExceeded - } - - dst = dst[:ctx.decoded] - } - - // br < 8, so uint8 is fine - bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead - for bitsLeft > 0 { - br.fill() - if len(dst) >= maxDecodedSize { - br.close() - return nil, ErrMaxDecodedSizeExceeded - } - v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask] - nBits := uint8(v.entry) - br.advance(nBits) - bitsLeft -= nBits - dst = append(dst, uint8(v.entry>>8)) - } - return dst, br.close() -} |