diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/fse/compress.go')
-rw-r--r-- | vendor/github.com/klauspost/compress/fse/compress.go | 683 |
1 files changed, 0 insertions, 683 deletions
diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go deleted file mode 100644 index 074018d8f..000000000 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ /dev/null @@ -1,683 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package fse - -import ( - "errors" - "fmt" -) - -// Compress the input bytes. Input must be < 2GB. -// Provide a Scratch buffer to avoid memory allocations. -// Note that the output is also kept in the scratch buffer. -// If input is too hard to compress, ErrIncompressible is returned. -// If input is a single byte value repeated ErrUseRLE is returned. -func Compress(in []byte, s *Scratch) ([]byte, error) { - if len(in) <= 1 { - return nil, ErrIncompressible - } - if len(in) > (2<<30)-1 { - return nil, errors.New("input too big, must be < 2GB") - } - s, err := s.prepare(in) - if err != nil { - return nil, err - } - - // Create histogram, if none was provided. - maxCount := s.maxCount - if maxCount == 0 { - maxCount = s.countSimple(in) - } - // Reset for next run. - s.clearCount = true - s.maxCount = 0 - if maxCount == len(in) { - // One symbol, use RLE - return nil, ErrUseRLE - } - if maxCount == 1 || maxCount < (len(in)>>7) { - // Each symbol present maximum once or too well distributed. - return nil, ErrIncompressible - } - s.optimalTableLog() - err = s.normalizeCount() - if err != nil { - return nil, err - } - err = s.writeCount() - if err != nil { - return nil, err - } - - if false { - err = s.validateNorm() - if err != nil { - return nil, err - } - } - - err = s.buildCTable() - if err != nil { - return nil, err - } - err = s.compress(in) - if err != nil { - return nil, err - } - s.Out = s.bw.out - // Check if we compressed. - if len(s.Out) >= len(in) { - return nil, ErrIncompressible - } - return s.Out, nil -} - -// cState contains the compression state of a stream. -type cState struct { - bw *bitWriter - stateTable []uint16 - state uint16 -} - -// init will initialize the compression state to the first symbol of the stream. -func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) { - c.bw = bw - c.stateTable = ct.stateTable - - nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16 - im := int32((nbBitsOut << 16) - first.deltaNbBits) - lu := (im >> nbBitsOut) + first.deltaFindState - c.state = c.stateTable[lu] -} - -// encode the output symbol provided and write it to the bitstream. -func (c *cState) encode(symbolTT symbolTransform) { - nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 - dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState - c.bw.addBits16NC(c.state, uint8(nbBitsOut)) - c.state = c.stateTable[dstState] -} - -// encode the output symbol provided and write it to the bitstream. -func (c *cState) encodeZero(symbolTT symbolTransform) { - nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 - dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState - c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut)) - c.state = c.stateTable[dstState] -} - -// flush will write the tablelog to the output and flush the remaining full bytes. -func (c *cState) flush(tableLog uint8) { - c.bw.flush32() - c.bw.addBits16NC(c.state, tableLog) - c.bw.flush() -} - -// compress is the main compression loop that will encode the input from the last byte to the first. -func (s *Scratch) compress(src []byte) error { - if len(src) <= 2 { - return errors.New("compress: src too small") - } - tt := s.ct.symbolTT[:256] - s.bw.reset(s.Out) - - // Our two states each encodes every second byte. - // Last byte encoded (first byte decoded) will always be encoded by c1. - var c1, c2 cState - - // Encode so remaining size is divisible by 4. - ip := len(src) - if ip&1 == 1 { - c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) - c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) - c1.encodeZero(tt[src[ip-3]]) - ip -= 3 - } else { - c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) - c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) - ip -= 2 - } - if ip&2 != 0 { - c2.encodeZero(tt[src[ip-1]]) - c1.encodeZero(tt[src[ip-2]]) - ip -= 2 - } - src = src[:ip] - - // Main compression loop. - switch { - case !s.zeroBits && s.actualTableLog <= 8: - // We can encode 4 symbols without requiring a flush. - // We do not need to check if any output is 0 bits. - for ; len(src) >= 4; src = src[:len(src)-4] { - s.bw.flush32() - v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] - c2.encode(tt[v0]) - c1.encode(tt[v1]) - c2.encode(tt[v2]) - c1.encode(tt[v3]) - } - case !s.zeroBits: - // We do not need to check if any output is 0 bits. - for ; len(src) >= 4; src = src[:len(src)-4] { - s.bw.flush32() - v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] - c2.encode(tt[v0]) - c1.encode(tt[v1]) - s.bw.flush32() - c2.encode(tt[v2]) - c1.encode(tt[v3]) - } - case s.actualTableLog <= 8: - // We can encode 4 symbols without requiring a flush - for ; len(src) >= 4; src = src[:len(src)-4] { - s.bw.flush32() - v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] - c2.encodeZero(tt[v0]) - c1.encodeZero(tt[v1]) - c2.encodeZero(tt[v2]) - c1.encodeZero(tt[v3]) - } - default: - for ; len(src) >= 4; src = src[:len(src)-4] { - s.bw.flush32() - v3, v2, v1, v0 := src[len(src)-4], src[len(src)-3], src[len(src)-2], src[len(src)-1] - c2.encodeZero(tt[v0]) - c1.encodeZero(tt[v1]) - s.bw.flush32() - c2.encodeZero(tt[v2]) - c1.encodeZero(tt[v3]) - } - } - - // Flush final state. - // Used to initialize state when decoding. - c2.flush(s.actualTableLog) - c1.flush(s.actualTableLog) - - s.bw.close() - return nil -} - -// writeCount will write the normalized histogram count to header. -// This is read back by readNCount. -func (s *Scratch) writeCount() error { - var ( - tableLog = s.actualTableLog - tableSize = 1 << tableLog - previous0 bool - charnum uint16 - - maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3 - - // Write Table Size - bitStream = uint32(tableLog - minTablelog) - bitCount = uint(4) - remaining = int16(tableSize + 1) /* +1 for extra accuracy */ - threshold = int16(tableSize) - nbBits = uint(tableLog + 1) - ) - if cap(s.Out) < maxHeaderSize { - s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize) - } - outP := uint(0) - out := s.Out[:maxHeaderSize] - - // stops at 1 - for remaining > 1 { - if previous0 { - start := charnum - for s.norm[charnum] == 0 { - charnum++ - } - for charnum >= start+24 { - start += 24 - bitStream += uint32(0xFFFF) << bitCount - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += 2 - bitStream >>= 16 - } - for charnum >= start+3 { - start += 3 - bitStream += 3 << bitCount - bitCount += 2 - } - bitStream += uint32(charnum-start) << bitCount - bitCount += 2 - if bitCount > 16 { - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += 2 - bitStream >>= 16 - bitCount -= 16 - } - } - - count := s.norm[charnum] - charnum++ - max := (2*threshold - 1) - remaining - if count < 0 { - remaining += count - } else { - remaining -= count - } - count++ // +1 for extra accuracy - if count >= threshold { - count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ - } - bitStream += uint32(count) << bitCount - bitCount += nbBits - if count < max { - bitCount-- - } - - previous0 = count == 1 - if remaining < 1 { - return errors.New("internal error: remaining<1") - } - for remaining < threshold { - nbBits-- - threshold >>= 1 - } - - if bitCount > 16 { - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += 2 - bitStream >>= 16 - bitCount -= 16 - } - } - - out[outP] = byte(bitStream) - out[outP+1] = byte(bitStream >> 8) - outP += (bitCount + 7) / 8 - - if charnum > s.symbolLen { - return errors.New("internal error: charnum > s.symbolLen") - } - s.Out = out[:outP] - return nil -} - -// symbolTransform contains the state transform for a symbol. -type symbolTransform struct { - deltaFindState int32 - deltaNbBits uint32 -} - -// String prints values as a human readable string. -func (s symbolTransform) String() string { - return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState) -} - -// cTable contains tables used for compression. -type cTable struct { - tableSymbol []byte - stateTable []uint16 - symbolTT []symbolTransform -} - -// allocCtable will allocate tables needed for compression. -// If existing tables a re big enough, they are simply re-used. -func (s *Scratch) allocCtable() { - tableSize := 1 << s.actualTableLog - // get tableSymbol that is big enough. - if cap(s.ct.tableSymbol) < tableSize { - s.ct.tableSymbol = make([]byte, tableSize) - } - s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] - - ctSize := tableSize - if cap(s.ct.stateTable) < ctSize { - s.ct.stateTable = make([]uint16, ctSize) - } - s.ct.stateTable = s.ct.stateTable[:ctSize] - - if cap(s.ct.symbolTT) < 256 { - s.ct.symbolTT = make([]symbolTransform, 256) - } - s.ct.symbolTT = s.ct.symbolTT[:256] -} - -// buildCTable will populate the compression table so it is ready to be used. -func (s *Scratch) buildCTable() error { - tableSize := uint32(1 << s.actualTableLog) - highThreshold := tableSize - 1 - var cumul [maxSymbolValue + 2]int16 - - s.allocCtable() - tableSymbol := s.ct.tableSymbol[:tableSize] - // symbol start positions - { - cumul[0] = 0 - for ui, v := range s.norm[:s.symbolLen-1] { - u := byte(ui) // one less than reference - if v == -1 { - // Low proba symbol - cumul[u+1] = cumul[u] + 1 - tableSymbol[highThreshold] = u - highThreshold-- - } else { - cumul[u+1] = cumul[u] + v - } - } - // Encode last symbol separately to avoid overflowing u - u := int(s.symbolLen - 1) - v := s.norm[s.symbolLen-1] - if v == -1 { - // Low proba symbol - cumul[u+1] = cumul[u] + 1 - tableSymbol[highThreshold] = byte(u) - highThreshold-- - } else { - cumul[u+1] = cumul[u] + v - } - if uint32(cumul[s.symbolLen]) != tableSize { - return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize) - } - cumul[s.symbolLen] = int16(tableSize) + 1 - } - // Spread symbols - s.zeroBits = false - { - step := tableStep(tableSize) - tableMask := tableSize - 1 - var position uint32 - // if any symbol > largeLimit, we may have 0 bits output. - largeLimit := int16(1 << (s.actualTableLog - 1)) - for ui, v := range s.norm[:s.symbolLen] { - symbol := byte(ui) - if v > largeLimit { - s.zeroBits = true - } - for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ { - tableSymbol[position] = symbol - position = (position + step) & tableMask - for position > highThreshold { - position = (position + step) & tableMask - } /* Low proba area */ - } - } - - // Check if we have gone through all positions - if position != 0 { - return errors.New("position!=0") - } - } - - // Build table - table := s.ct.stateTable - { - tsi := int(tableSize) - for u, v := range tableSymbol { - // TableU16 : sorted by symbol order; gives next state value - table[cumul[v]] = uint16(tsi + u) - cumul[v]++ - } - } - - // Build Symbol Transformation Table - { - total := int16(0) - symbolTT := s.ct.symbolTT[:s.symbolLen] - tableLog := s.actualTableLog - tl := (uint32(tableLog) << 16) - (1 << tableLog) - for i, v := range s.norm[:s.symbolLen] { - switch v { - case 0: - case -1, 1: - symbolTT[i].deltaNbBits = tl - symbolTT[i].deltaFindState = int32(total - 1) - total++ - default: - maxBitsOut := uint32(tableLog) - highBits(uint32(v-1)) - minStatePlus := uint32(v) << maxBitsOut - symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus - symbolTT[i].deltaFindState = int32(total - v) - total += v - } - } - if total != int16(tableSize) { - return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize) - } - } - return nil -} - -// countSimple will create a simple histogram in s.count. -// Returns the biggest count. -// Does not update s.clearCount. -func (s *Scratch) countSimple(in []byte) (max int) { - for _, v := range in { - s.count[v]++ - } - m, symlen := uint32(0), s.symbolLen - for i, v := range s.count[:] { - if v == 0 { - continue - } - if v > m { - m = v - } - symlen = uint16(i) + 1 - } - s.symbolLen = symlen - return int(m) -} - -// minTableLog provides the minimum logSize to safely represent a distribution. -func (s *Scratch) minTableLog() uint8 { - minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1 - minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2 - if minBitsSrc < minBitsSymbols { - return uint8(minBitsSrc) - } - return uint8(minBitsSymbols) -} - -// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog -func (s *Scratch) optimalTableLog() { - tableLog := s.TableLog - minBits := s.minTableLog() - maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2 - if maxBitsSrc < tableLog { - // Accuracy can be reduced - tableLog = maxBitsSrc - } - if minBits > tableLog { - tableLog = minBits - } - // Need a minimum to safely represent all symbol values - if tableLog < minTablelog { - tableLog = minTablelog - } - if tableLog > maxTableLog { - tableLog = maxTableLog - } - s.actualTableLog = tableLog -} - -var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000} - -// normalizeCount will normalize the count of the symbols so -// the total is equal to the table size. -func (s *Scratch) normalizeCount() error { - var ( - tableLog = s.actualTableLog - scale = 62 - uint64(tableLog) - step = (1 << 62) / uint64(s.br.remain()) - vStep = uint64(1) << (scale - 20) - stillToDistribute = int16(1 << tableLog) - largest int - largestP int16 - lowThreshold = (uint32)(s.br.remain() >> tableLog) - ) - - for i, cnt := range s.count[:s.symbolLen] { - // already handled - // if (count[s] == s.length) return 0; /* rle special case */ - - if cnt == 0 { - s.norm[i] = 0 - continue - } - if cnt <= lowThreshold { - s.norm[i] = -1 - stillToDistribute-- - } else { - proba := (int16)((uint64(cnt) * step) >> scale) - if proba < 8 { - restToBeat := vStep * uint64(rtbTable[proba]) - v := uint64(cnt)*step - (uint64(proba) << scale) - if v > restToBeat { - proba++ - } - } - if proba > largestP { - largestP = proba - largest = i - } - s.norm[i] = proba - stillToDistribute -= proba - } - } - - if -stillToDistribute >= (s.norm[largest] >> 1) { - // corner case, need another normalization method - return s.normalizeCount2() - } - s.norm[largest] += stillToDistribute - return nil -} - -// Secondary normalization method. -// To be used when primary method fails. -func (s *Scratch) normalizeCount2() error { - const notYetAssigned = -2 - var ( - distributed uint32 - total = uint32(s.br.remain()) - tableLog = s.actualTableLog - lowThreshold = total >> tableLog - lowOne = (total * 3) >> (tableLog + 1) - ) - for i, cnt := range s.count[:s.symbolLen] { - if cnt == 0 { - s.norm[i] = 0 - continue - } - if cnt <= lowThreshold { - s.norm[i] = -1 - distributed++ - total -= cnt - continue - } - if cnt <= lowOne { - s.norm[i] = 1 - distributed++ - total -= cnt - continue - } - s.norm[i] = notYetAssigned - } - toDistribute := (1 << tableLog) - distributed - - if (total / toDistribute) > lowOne { - // risk of rounding to zero - lowOne = (total * 3) / (toDistribute * 2) - for i, cnt := range s.count[:s.symbolLen] { - if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { - s.norm[i] = 1 - distributed++ - total -= cnt - continue - } - } - toDistribute = (1 << tableLog) - distributed - } - if distributed == uint32(s.symbolLen)+1 { - // all values are pretty poor; - // probably incompressible data (should have already been detected); - // find max, then give all remaining points to max - var maxV int - var maxC uint32 - for i, cnt := range s.count[:s.symbolLen] { - if cnt > maxC { - maxV = i - maxC = cnt - } - } - s.norm[maxV] += int16(toDistribute) - return nil - } - - if total == 0 { - // all of the symbols were low enough for the lowOne or lowThreshold - for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) { - if s.norm[i] > 0 { - toDistribute-- - s.norm[i]++ - } - } - return nil - } - - var ( - vStepLog = 62 - uint64(tableLog) - mid = uint64((1 << (vStepLog - 1)) - 1) - rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining - tmpTotal = mid - ) - for i, cnt := range s.count[:s.symbolLen] { - if s.norm[i] == notYetAssigned { - var ( - end = tmpTotal + uint64(cnt)*rStep - sStart = uint32(tmpTotal >> vStepLog) - sEnd = uint32(end >> vStepLog) - weight = sEnd - sStart - ) - if weight < 1 { - return errors.New("weight < 1") - } - s.norm[i] = int16(weight) - tmpTotal = end - } - } - return nil -} - -// validateNorm validates the normalized histogram table. -func (s *Scratch) validateNorm() (err error) { - var total int - for _, v := range s.norm[:s.symbolLen] { - if v >= 0 { - total += int(v) - } else { - total -= int(v) - } - } - defer func() { - if err == nil { - return - } - fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen) - for i, v := range s.norm[:s.symbolLen] { - fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v) - } - }() - if total != (1 << s.actualTableLog) { - return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog) - } - for i, v := range s.count[s.symbolLen:] { - if v != 0 { - return fmt.Errorf("warning: Found symbol out of range, %d after cut", i) - } - } - return nil -} |