diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/flate')
12 files changed, 276 insertions, 165 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index f8435998e..82882961a 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -131,7 +131,8 @@ func (d *compressor) fillDeflate(b []byte) int { s := d.state if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { // shift the window by windowSize - copy(d.window[:], d.window[windowSize:2*windowSize]) + //copy(d.window[:], d.window[windowSize:2*windowSize]) + *(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:]) s.index -= windowSize d.windowEnd -= windowSize if d.blockStart >= windowSize { @@ -293,7 +294,6 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of } offset = 0 - cGain := 0 if d.chain < 100 { for i := prevHead; tries > 0; tries-- { if wEnd == win[i+length] { @@ -321,10 +321,14 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of return } + // Minimum gain to accept a match. + cGain := 4 + // Some like it higher (CSV), some like it lower (JSON) - const baseCost = 6 + const baseCost = 3 // Base is 4 bytes at with an additional cost. // Matches must be better than this. + for i := prevHead; tries > 0; tries-- { if wEnd == win[i+length] { n := matchLen(win[i:i+minMatchLook], wPos) @@ -332,7 +336,7 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of // Calculate gain. Estimate newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) - //fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n])) + //fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length) if newGain > cGain { length = n offset = pos - i @@ -373,6 +377,12 @@ func hash4(b []byte) uint32 { return hash4u(binary.LittleEndian.Uint32(b), hashBits) } +// hash4 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4u(u uint32, h uint8) uint32 { + return (u * prime4bytes) >> (32 - h) +} + // bulkHash4 will compute hashes using the same // algorithm as hash4 func bulkHash4(b []byte, dst []uint32) { @@ -483,27 +493,103 @@ func (d *compressor) deflateLazy() { } if prevLength >= minMatchLength && s.length <= prevLength { - // Check for better match at end... + // No better match, but check for better match at end... // - // checkOff must be >=2 since we otherwise risk checking s.index - // Offset of 2 seems to yield best results. + // Skip forward a number of bytes. + // Offset of 2 seems to yield best results. 3 is sometimes better. const checkOff = 2 - prevIndex := s.index - 1 - if prevIndex+prevLength+checkOff < s.maxInsertIndex { - end := lookahead - if lookahead > maxMatchLength { - end = maxMatchLength - } - end += prevIndex - idx := prevIndex + prevLength - (4 - checkOff) - h := hash4(d.window[idx:]) - ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + (4 - checkOff) - if ch2 > minIndex { - length := matchLen(d.window[prevIndex:end], d.window[ch2:]) - // It seems like a pure length metric is best. - if length > prevLength { - prevLength = length - prevOffset = prevIndex - ch2 + + // Check all, except full length + if prevLength < maxMatchLength-checkOff { + prevIndex := s.index - 1 + if prevIndex+prevLength < s.maxInsertIndex { + end := lookahead + if lookahead > maxMatchLength+checkOff { + end = maxMatchLength + checkOff + } + end += prevIndex + + // Hash at match end. + h := hash4(d.window[prevIndex+prevLength:]) + ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { + length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) + // It seems like a pure length metric is best. + if length > prevLength { + prevLength = length + prevOffset = prevIndex - ch2 + + // Extend back... + for i := checkOff - 1; i >= 0; i-- { + if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] { + // Emit tokens we "owe" + for j := 0; j <= i; j++ { + d.tokens.AddLiteral(d.window[prevIndex+j]) + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } + } + break + } else { + prevLength++ + } + } + } else if false { + // Check one further ahead. + // Only rarely better, disabled for now. + prevIndex++ + h := hash4(d.window[prevIndex+prevLength:]) + ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { + length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) + // It seems like a pure length metric is best. + if length > prevLength+checkOff { + prevLength = length + prevOffset = prevIndex - ch2 + prevIndex-- + + // Extend back... + for i := checkOff; i >= 0; i-- { + if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] { + // Emit tokens we "owe" + for j := 0; j <= i; j++ { + d.tokens.AddLiteral(d.window[prevIndex+j]) + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } + } + break + } else { + prevLength++ + } + } + } + } + } } } } diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go index 71c75a065..bb36351a5 100644 --- a/vendor/github.com/klauspost/compress/flate/dict_decoder.go +++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go @@ -7,19 +7,19 @@ package flate // dictDecoder implements the LZ77 sliding dictionary as used in decompression. // LZ77 decompresses data through sequences of two forms of commands: // -// * Literal insertions: Runs of one or more symbols are inserted into the data -// stream as is. This is accomplished through the writeByte method for a -// single symbol, or combinations of writeSlice/writeMark for multiple symbols. -// Any valid stream must start with a literal insertion if no preset dictionary -// is used. +// - Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. // -// * Backward copies: Runs of one or more symbols are copied from previously -// emitted data. Backward copies come as the tuple (dist, length) where dist -// determines how far back in the stream to copy from and length determines how -// many bytes to copy. Note that it is valid for the length to be greater than -// the distance. Since LZ77 uses forward copies, that situation is used to -// perform a form of run-length encoding on repeated runs of symbols. -// The writeCopy and tryWriteCopy are used to implement this command. +// - Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. // // For performance reasons, this implementation performs little to no sanity // checks about the arguments. As such, the invariants documented for each diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index f781aaa62..24caf5f70 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -58,17 +58,6 @@ const ( prime8bytes = 0xcf1bbcdcb7a56463 ) -func load32(b []byte, i int) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 -} - -func load64(b []byte, i int) uint64 { - return binary.LittleEndian.Uint64(b[i:]) -} - func load3232(b []byte, i int32) uint32 { return binary.LittleEndian.Uint32(b[i:]) } @@ -77,10 +66,6 @@ func load6432(b []byte, i int32) uint64 { return binary.LittleEndian.Uint64(b[i:]) } -func hash(u uint32) uint32 { - return (u * 0x1e35a7bd) >> tableShift -} - type tableEntry struct { offset int32 } @@ -104,7 +89,8 @@ func (e *fastGen) addBlock(src []byte) int32 { } // Move down offset := int32(len(e.hist)) - maxMatchOffset - copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + // copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + *(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:]) e.cur += offset e.hist = e.hist[:maxMatchOffset] } @@ -114,39 +100,36 @@ func (e *fastGen) addBlock(src []byte) int32 { return s } -// hash4 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4u(u uint32, h uint8) uint32 { - return (u * prime4bytes) >> (32 - h) -} - type tableEntryPrev struct { Cur tableEntry Prev tableEntry } -// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4x64(u uint64, h uint8) uint32 { - return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32) -} - // hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. // Preferably h should be a constant and should always be <64. func hash7(u uint64, h uint8) uint32 { return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64)) } -// hash8 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash8(u uint64, h uint8) uint32 { - return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64)) -} - -// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash6(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64)) +// hashLen returns a hash of the lowest mls bytes of with length output bits. +// mls must be >=3 and <=8. Any other value will return hash for 4 bytes. +// length should always be < 32. +// Preferably length and mls should be a constant for inlining. +func hashLen(u uint64, length, mls uint8) uint32 { + switch mls { + case 3: + return (uint32(u<<8) * prime3bytes) >> (32 - length) + case 5: + return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length)) + case 6: + return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length)) + case 7: + return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length)) + case 8: + return uint32((u * prime8bytes) >> (64 - length)) + default: + return (uint32(u) * prime4bytes) >> (32 - length) + } } // matchlen will return the match length between offsets and t in src. diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go index 40ef45c2f..89a5dd89f 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -265,9 +265,9 @@ func (w *huffmanBitWriter) writeBytes(bytes []byte) { // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional // information. Code badCode is an end marker // -// numLiterals The number of literals in literalEncoding -// numOffsets The number of offsets in offsetEncoding -// litenc, offenc The literal and offset encoder to use +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +// litenc, offenc The literal and offset encoder to use func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { for i := range w.codegenFreq { w.codegenFreq[i] = 0 @@ -460,9 +460,9 @@ func (w *huffmanBitWriter) writeOutBits() { // Write the header of a dynamic Huffman block to the output stream. // -// numLiterals The number of literals specified in codegen -// numOffsets The number of offsets specified in codegen -// numCodegens The number of codegens used in codegen +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { if w.err != nil { return @@ -790,9 +790,11 @@ func (w *huffmanBitWriter) fillTokens() { // and offsetEncoding. // The number of literal and offset tokens is returned. func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { - copy(w.literalFreq[:], t.litHist[:]) - copy(w.literalFreq[256:], t.extraHist[:]) - copy(w.offsetFreq[:], t.offHist[:offsetCodeCount]) + //copy(w.literalFreq[:], t.litHist[:]) + *(*[256]uint16)(w.literalFreq[:]) = t.litHist + //copy(w.literalFreq[256:], t.extraHist[:]) + *(*[32]uint16)(w.literalFreq[256:]) = t.extraHist + w.offsetFreq = t.offHist if t.n == 0 { return diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go index 5ac144f28..be7b58b47 100644 --- a/vendor/github.com/klauspost/compress/flate/huffman_code.go +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -168,13 +168,18 @@ func (h *huffmanEncoder) canReuseBits(freq []uint16) int { // The cases of 0, 1, and 2 literals are handled by special case code. // // list An array of the literals with non-zero frequencies -// and their associated frequencies. The array is in order of increasing -// frequency, and has as its last element a special element with frequency -// MaxInt32 +// +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// // maxBits The maximum number of bits that should be used to encode any literal. -// Must be less than 16. +// +// Must be less than 16. +// // return An integer array in which array[i] indicates the number of literals -// that should be encoded in i bits. +// +// that should be encoded in i bits. func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { if maxBits >= maxBitsLimit { panic("flate: maxBits too large") diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go index 0f14f8d63..703b9a89a 100644 --- a/vendor/github.com/klauspost/compress/flate/level1.go +++ b/vendor/github.com/klauspost/compress/flate/level1.go @@ -19,6 +19,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashBytes = 5 ) if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) @@ -68,7 +69,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { sLimit := int32(len(src) - inputMargin) // nextEmit is where in src the next emitLiteral should start from. - cv := load3232(src, s) + cv := load6432(src, s) for { const skipLog = 5 @@ -77,7 +78,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { nextS := s var candidate tableEntry for { - nextHash := hash(cv) + nextHash := hashLen(cv, tableBits, hashBytes) candidate = e.table[nextHash] nextS = s + doEvery + (s-nextEmit)>>skipLog if nextS > sLimit { @@ -86,16 +87,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { now := load6432(src, nextS) e.table[nextHash] = tableEntry{offset: s + e.cur} - nextHash = hash(uint32(now)) + nextHash = hashLen(now, tableBits, hashBytes) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } // Do one right away... - cv = uint32(now) + cv = now s = nextS nextS++ candidate = e.table[nextHash] @@ -103,11 +104,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } - cv = uint32(now) + cv = now s = nextS } @@ -198,9 +199,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { } if s >= sLimit { // Index first pair after match end. - if int(s+l+4) < len(src) { - cv := load3232(src, s) - e.table[hash(cv)] = tableEntry{offset: s + e.cur} + if int(s+l+8) < len(src) { + cv := load6432(src, s) + e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -213,16 +214,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) { // three load32 calls. x := load6432(src, s-2) o := e.cur + s - 2 - prevHash := hash(uint32(x)) + prevHash := hashLen(x, tableBits, hashBytes) e.table[prevHash] = tableEntry{offset: o} x >>= 16 - currHash := hash(uint32(x)) + currHash := hashLen(x, tableBits, hashBytes) candidate = e.table[currHash] e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { - cv = uint32(x >> 8) + cv = x >> 8 s++ break } diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go index 8603fbd55..876dfbe30 100644 --- a/vendor/github.com/klauspost/compress/flate/level2.go +++ b/vendor/github.com/klauspost/compress/flate/level2.go @@ -16,6 +16,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashBytes = 5 ) if debugDeflate && e.cur < 0 { @@ -66,7 +67,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { sLimit := int32(len(src) - inputMargin) // nextEmit is where in src the next emitLiteral should start from. - cv := load3232(src, s) + cv := load6432(src, s) for { // When should we start skipping if we haven't found matches in a long while. const skipLog = 5 @@ -75,7 +76,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { nextS := s var candidate tableEntry for { - nextHash := hash4u(cv, bTableBits) + nextHash := hashLen(cv, bTableBits, hashBytes) s = nextS nextS = s + doEvery + (s-nextEmit)>>skipLog if nextS > sLimit { @@ -84,16 +85,16 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { candidate = e.table[nextHash] now := load6432(src, nextS) e.table[nextHash] = tableEntry{offset: s + e.cur} - nextHash = hash4u(uint32(now), bTableBits) + nextHash = hashLen(now, bTableBits, hashBytes) offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { e.table[nextHash] = tableEntry{offset: nextS + e.cur} break } // Do one right away... - cv = uint32(now) + cv = now s = nextS nextS++ candidate = e.table[nextHash] @@ -101,10 +102,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { e.table[nextHash] = tableEntry{offset: s + e.cur} offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { break } - cv = uint32(now) + cv = now } // A 4-byte match has been found. We'll later see if more than 4 bytes @@ -154,9 +155,9 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { if s >= sLimit { // Index first pair after match end. - if int(s+l+4) < len(src) { - cv := load3232(src, s) - e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur} + if int(s+l+8) < len(src) { + cv := load6432(src, s) + e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur} } goto emitRemainder } @@ -164,15 +165,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { // Store every second hash in-between, but offset by 1. for i := s - l + 2; i < s-5; i += 7 { x := load6432(src, i) - nextHash := hash4u(uint32(x), bTableBits) + nextHash := hashLen(x, bTableBits, hashBytes) e.table[nextHash] = tableEntry{offset: e.cur + i} // Skip one x >>= 16 - nextHash = hash4u(uint32(x), bTableBits) + nextHash = hashLen(x, bTableBits, hashBytes) e.table[nextHash] = tableEntry{offset: e.cur + i + 2} // Skip one x >>= 16 - nextHash = hash4u(uint32(x), bTableBits) + nextHash = hashLen(x, bTableBits, hashBytes) e.table[nextHash] = tableEntry{offset: e.cur + i + 4} } @@ -184,17 +185,17 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) { // three load32 calls. x := load6432(src, s-2) o := e.cur + s - 2 - prevHash := hash4u(uint32(x), bTableBits) - prevHash2 := hash4u(uint32(x>>8), bTableBits) + prevHash := hashLen(x, bTableBits, hashBytes) + prevHash2 := hashLen(x>>8, bTableBits, hashBytes) e.table[prevHash] = tableEntry{offset: o} e.table[prevHash2] = tableEntry{offset: o + 1} - currHash := hash4u(uint32(x>>16), bTableBits) + currHash := hashLen(x>>16, bTableBits, hashBytes) candidate = e.table[currHash] e.table[currHash] = tableEntry{offset: o + 2} offset := s - (candidate.offset - e.cur) if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { - cv = uint32(x >> 24) + cv = x >> 24 s++ break } diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go index 039639f89..7aa2b72a1 100644 --- a/vendor/github.com/klauspost/compress/flate/level3.go +++ b/vendor/github.com/klauspost/compress/flate/level3.go @@ -11,10 +11,11 @@ type fastEncL3 struct { // Encode uses a similar algorithm to level 2, will check up to two candidates. func (e *fastEncL3) Encode(dst *tokens, src []byte) { const ( - inputMargin = 8 - 1 + inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin tableBits = 16 tableSize = 1 << tableBits + hashBytes = 5 ) if debugDeflate && e.cur < 0 { @@ -69,20 +70,20 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { sLimit := int32(len(src) - inputMargin) // nextEmit is where in src the next emitLiteral should start from. - cv := load3232(src, s) + cv := load6432(src, s) for { - const skipLog = 6 + const skipLog = 7 nextS := s var candidate tableEntry for { - nextHash := hash4u(cv, tableBits) + nextHash := hashLen(cv, tableBits, hashBytes) s = nextS nextS = s + 1 + (s-nextEmit)>>skipLog if nextS > sLimit { goto emitRemainder } candidates := e.table[nextHash] - now := load3232(src, nextS) + now := load6432(src, nextS) // Safe offset distance until s + 4... minOffset := e.cur + s - (maxMatchOffset - 4) @@ -96,8 +97,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { continue } - if cv == load3232(src, candidate.offset-e.cur) { - if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) { + if uint32(cv) == load3232(src, candidate.offset-e.cur) { + if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) { break } // Both match and are valid, pick longest. @@ -112,7 +113,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { // We only check if value mismatches. // Offset will always be invalid in other cases. candidate = candidates.Prev - if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { break } } @@ -164,9 +165,9 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { if s >= sLimit { t += l // Index first pair after match end. - if int(t+4) < len(src) && t > 0 { - cv := load3232(src, t) - nextHash := hash4u(cv, tableBits) + if int(t+8) < len(src) && t > 0 { + cv = load6432(src, t) + nextHash := hashLen(cv, tableBits, hashBytes) e.table[nextHash] = tableEntryPrev{ Prev: e.table[nextHash].Cur, Cur: tableEntry{offset: e.cur + t}, @@ -176,8 +177,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { } // Store every 5th hash in-between. - for i := s - l + 2; i < s-5; i += 5 { - nextHash := hash4u(load3232(src, i), tableBits) + for i := s - l + 2; i < s-5; i += 6 { + nextHash := hashLen(load6432(src, i), tableBits, hashBytes) e.table[nextHash] = tableEntryPrev{ Prev: e.table[nextHash].Cur, Cur: tableEntry{offset: e.cur + i}} @@ -185,23 +186,23 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { // We could immediately start working at s now, but to improve // compression we first update the hash table at s-2 to s. x := load6432(src, s-2) - prevHash := hash4u(uint32(x), tableBits) + prevHash := hashLen(x, tableBits, hashBytes) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, Cur: tableEntry{offset: e.cur + s - 2}, } x >>= 8 - prevHash = hash4u(uint32(x), tableBits) + prevHash = hashLen(x, tableBits, hashBytes) e.table[prevHash] = tableEntryPrev{ Prev: e.table[prevHash].Cur, Cur: tableEntry{offset: e.cur + s - 1}, } x >>= 8 - currHash := hash4u(uint32(x), tableBits) + currHash := hashLen(x, tableBits, hashBytes) candidates := e.table[currHash] - cv = uint32(x) + cv = x e.table[currHash] = tableEntryPrev{ Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}, @@ -212,17 +213,17 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) { minOffset := e.cur + s - (maxMatchOffset - 4) if candidate.offset > minOffset { - if cv == load3232(src, candidate.offset-e.cur) { + if uint32(cv) == load3232(src, candidate.offset-e.cur) { // Found a match... continue } candidate = candidates.Prev - if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { // Match at prev... continue } } - cv = uint32(x >> 8) + cv = x >> 8 s++ break } diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go index 1cbffa1ae..23c08b325 100644 --- a/vendor/github.com/klauspost/compress/flate/level4.go +++ b/vendor/github.com/klauspost/compress/flate/level4.go @@ -12,6 +12,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 ) if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) @@ -80,7 +81,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { nextS := s var t int32 for { - nextHashS := hash4x64(cv, tableBits) + nextHashS := hashLen(cv, tableBits, hashShortBytes) nextHashL := hash7(cv, tableBits) s = nextS @@ -168,7 +169,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { // Index first pair after match end. if int(s+8) < len(src) { cv := load6432(src, s) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur} e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} } goto emitRemainder @@ -183,7 +184,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 i += 3 for ; i < s-1; i += 3 { @@ -192,7 +193,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { t2 := tableEntry{offset: t.offset + 1} e.bTable[hash7(cv, tableBits)] = t e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 } } } @@ -201,7 +202,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) { // compression we first update the hash table at s-1 and at s. x := load6432(src, s-1) o := e.cur + s - 1 - prevHashS := hash4x64(x, tableBits) + prevHashS := hashLen(x, tableBits, hashShortBytes) prevHashL := hash7(x, tableBits) e.table[prevHashS] = tableEntry{offset: o} e.bTable[prevHashL] = tableEntry{offset: o} diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index 4b97576bd..83ef50ba4 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -12,6 +12,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 ) if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) @@ -88,7 +89,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { var l int32 var t int32 for { - nextHashS := hash4x64(cv, tableBits) + nextHashS := hashLen(cv, tableBits, hashShortBytes) nextHashL := hash7(cv, tableBits) s = nextS @@ -105,7 +106,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { eLong := &e.bTable[nextHashL] eLong.Cur, eLong.Prev = entry, eLong.Cur - nextHashS = hash4x64(next, tableBits) + nextHashS = hashLen(next, tableBits, hashShortBytes) nextHashL = hash7(next, tableBits) t = lCandidate.Cur.offset - e.cur @@ -191,14 +192,21 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { // Try to locate a better match by checking the end of best match... if sAt := s + l; l < 30 && sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset - // Test current - t2 := eLong - e.cur - l - off := s - t2 + t2 := eLong - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 if t2 >= 0 && off < maxMatchOffset && off > 0 { - if l2 := e.matchlenLong(s, t2, src); l2 > l { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { t = t2 l = l2 + s = s2 } } } @@ -250,7 +258,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { if i < s-1 { cv := load6432(src, i) t := tableEntry{offset: i + e.cur} - e.table[hash4x64(cv, tableBits)] = t + e.table[hashLen(cv, tableBits, hashShortBytes)] = t eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur @@ -263,7 +271,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { // We only have enough bits for a short entry at i+2 cv >>= 8 t = tableEntry{offset: t.offset + 1} - e.table[hash4x64(cv, tableBits)] = t + e.table[hashLen(cv, tableBits, hashShortBytes)] = t // Skip one - otherwise we risk hitting 's' i += 4 @@ -273,7 +281,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = t, eLong.Cur - e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 } } } @@ -282,7 +290,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) { // compression we first update the hash table at s-1 and at s. x := load6432(src, s-1) o := e.cur + s - 1 - prevHashS := hash4x64(x, tableBits) + prevHashS := hashLen(x, tableBits, hashShortBytes) prevHashL := hash7(x, tableBits) e.table[prevHashS] = tableEntry{offset: o} eLong := &e.bTable[prevHashL] diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go index 62888edf3..f1e9d98fa 100644 --- a/vendor/github.com/klauspost/compress/flate/level6.go +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -12,6 +12,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { const ( inputMargin = 12 - 1 minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 ) if debugDeflate && e.cur < 0 { panic(fmt.Sprint("e.cur < 0: ", e.cur)) @@ -90,7 +91,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { var l int32 var t int32 for { - nextHashS := hash4x64(cv, tableBits) + nextHashS := hashLen(cv, tableBits, hashShortBytes) nextHashL := hash7(cv, tableBits) s = nextS nextS = s + doEvery + (s-nextEmit)>>skipLog @@ -107,7 +108,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { eLong.Cur, eLong.Prev = entry, eLong.Cur // Calculate hashes of 'next' - nextHashS = hash4x64(next, tableBits) + nextHashS = hashLen(next, tableBits, hashShortBytes) nextHashL = hash7(next, tableBits) t = lCandidate.Cur.offset - e.cur @@ -213,24 +214,33 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // Try to locate a better match by checking the end-of-match... if sAt := s + l; sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] // Test current - t2 := eLong.Cur.offset - e.cur - l - off := s - t2 + t2 := eLong.Cur.offset - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 if off < maxMatchOffset { if off > 0 && t2 >= 0 { - if l2 := e.matchlenLong(s, t2, src); l2 > l { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { t = t2 l = l2 + s = s2 } } // Test next: - t2 = eLong.Prev.offset - e.cur - l - off := s - t2 + t2 = eLong.Prev.offset - e.cur - l + skipBeginning + off := s2 - t2 if off > 0 && off < maxMatchOffset && t2 >= 0 { - if l2 := e.matchlenLong(s, t2, src); l2 > l { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { t = t2 l = l2 + s = s2 } } } @@ -277,7 +287,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { // Index after match end. for i := nextS + 1; i < int32(len(src))-8; i += 2 { cv := load6432(src, i) - e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur} eLong := &e.bTable[hash7(cv, tableBits)] eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur } @@ -292,7 +302,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) { t2 := tableEntry{offset: t.offset + 1} eLong := &e.bTable[hash7(cv, tableBits)] eLong2 := &e.bTable[hash7(cv>>8, tableBits)] - e.table[hash4x64(cv, tableBits)] = t + e.table[hashLen(cv, tableBits, hashShortBytes)] = t eLong.Cur, eLong.Prev = t, eLong.Cur eLong2.Cur, eLong2.Prev = t2, eLong2.Cur } diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go index 93a1d1503..f3d4139ef 100644 --- a/vendor/github.com/klauspost/compress/flate/stateless.go +++ b/vendor/github.com/klauspost/compress/flate/stateless.go @@ -86,11 +86,19 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { dict = dict[len(dict)-maxStatelessDict:] } + // For subsequent loops, keep shallow dict reference to avoid alloc+copy. + var inDict []byte + for len(in) > 0 { todo := in - if len(todo) > maxStatelessBlock-len(dict) { + if len(inDict) > 0 { + if len(todo) > maxStatelessBlock-maxStatelessDict { + todo = todo[:maxStatelessBlock-maxStatelessDict] + } + } else if len(todo) > maxStatelessBlock-len(dict) { todo = todo[:maxStatelessBlock-len(dict)] } + inOrg := in in = in[len(todo):] uncompressed := todo if len(dict) > 0 { @@ -102,7 +110,11 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { todo = combined } // Compress - statelessEnc(&dst, todo, int16(len(dict))) + if len(inDict) == 0 { + statelessEnc(&dst, todo, int16(len(dict))) + } else { + statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict) + } isEof := eof && len(in) == 0 if dst.n == 0 { @@ -119,7 +131,8 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { } if len(in) > 0 { // Retain a dict if we have more - dict = todo[len(todo)-maxStatelessDict:] + inDict = inOrg[len(uncompressed)-maxStatelessDict:] + dict = nil dst.Reset() } if bw.err != nil { |