diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2')
5 files changed, 851 insertions, 27 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/README.md b/vendor/github.com/klauspost/compress/s2/README.md index 8284bb081..1d9220cbf 100644 --- a/vendor/github.com/klauspost/compress/s2/README.md +++ b/vendor/github.com/klauspost/compress/s2/README.md @@ -79,7 +79,7 @@ This will take ownership of the buffer until the stream is closed. func EncodeStream(src []byte, dst io.Writer) error { enc := s2.NewWriter(dst) // The encoder owns the buffer until Flush or Close is called. - err := enc.EncodeBuffer(buf) + err := enc.EncodeBuffer(src) if err != nil { enc.Close() return err diff --git a/vendor/github.com/klauspost/compress/s2/decode_other.go b/vendor/github.com/klauspost/compress/s2/decode_other.go index 2cb55c2c7..c99d40b69 100644 --- a/vendor/github.com/klauspost/compress/s2/decode_other.go +++ b/vendor/github.com/klauspost/compress/s2/decode_other.go @@ -11,6 +11,8 @@ package s2 import ( "fmt" "strconv" + + "github.com/klauspost/compress/internal/le" ) // decode writes the decoding of src to dst. It assumes that the varint-encoded @@ -38,21 +40,18 @@ func s2Decode(dst, src []byte) int { case x < 60: s++ case x == 60: + x = uint32(src[s+1]) s += 2 - x = uint32(src[s-1]) case x == 61: - in := src[s : s+3] - x = uint32(in[1]) | uint32(in[2])<<8 + x = uint32(le.Load16(src, s+1)) s += 3 case x == 62: - in := src[s : s+4] // Load as 32 bit and shift down. - x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24 + x = le.Load32(src, s) x >>= 8 s += 4 case x == 63: - in := src[s : s+5] - x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24 + x = le.Load32(src, s+1) s += 5 } length = int(x) + 1 @@ -85,8 +84,7 @@ func s2Decode(dst, src []byte) int { length = int(src[s]) + 4 s += 1 case 6: - in := src[s : s+2] - length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8) + length = int(le.Load16(src, s)) + 1<<8 s += 2 case 7: in := src[s : s+3] @@ -99,15 +97,13 @@ func s2Decode(dst, src []byte) int { } length += 4 case tagCopy2: - in := src[s : s+3] - offset = int(uint32(in[1]) | uint32(in[2])<<8) - length = 1 + int(in[0])>>2 + offset = int(le.Load16(src, s+1)) + length = 1 + int(src[s])>>2 s += 3 case tagCopy4: - in := src[s : s+5] - offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24) - length = 1 + int(in[0])>>2 + offset = int(le.Load32(src, s+1)) + length = 1 + int(src[s])>>2 s += 5 } diff --git a/vendor/github.com/klauspost/compress/s2/encode_all.go b/vendor/github.com/klauspost/compress/s2/encode_all.go index 997704569..a473b6452 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_all.go +++ b/vendor/github.com/klauspost/compress/s2/encode_all.go @@ -10,14 +10,16 @@ import ( "encoding/binary" "fmt" "math/bits" + + "github.com/klauspost/compress/internal/le" ) func load32(b []byte, i int) uint32 { - return binary.LittleEndian.Uint32(b[i:]) + return le.Load32(b, i) } func load64(b []byte, i int) uint64 { - return binary.LittleEndian.Uint64(b[i:]) + return le.Load64(b, i) } // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. @@ -44,7 +46,12 @@ func encodeGo(dst, src []byte) []byte { d += emitLiteral(dst[d:], src) return dst[:d] } - n := encodeBlockGo(dst[d:], src) + var n int + if len(src) < 64<<10 { + n = encodeBlockGo64K(dst[d:], src) + } else { + n = encodeBlockGo(dst[d:], src) + } if n > 0 { d += n return dst[:d] @@ -70,7 +77,6 @@ func encodeBlockGo(dst, src []byte) (d int) { debug = false ) - var table [maxTableSize]uint32 // sLimit is when to stop looking for offset/length copies. The inputMargin @@ -277,13 +283,228 @@ emitRemainder: return d } -func encodeBlockSnappyGo(dst, src []byte) (d int) { +// encodeBlockGo64K is a specialized version for compressing blocks <= 64KB +func encodeBlockGo64K(dst, src []byte) (d int) { // Initialize the hash table. const ( tableBits = 14 maxTableSize = 1 << tableBits + + debug = false ) + var table [maxTableSize]uint16 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // Bail if we can't compress to at least this. + dstLimit := len(src) - len(src)>>5 - 5 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + cv := load64(src, s) + + // We search for a repeat at -1, but don't output repeats when nextEmit == 0 + repeat := 1 + + for { + candidate := 0 + for { + // Next src position to check + nextS := s + (s-nextEmit)>>5 + 4 + if nextS > sLimit { + goto emitRemainder + } + hash0 := hash6(cv, tableBits) + hash1 := hash6(cv>>8, tableBits) + candidate = int(table[hash0]) + candidate2 := int(table[hash1]) + table[hash0] = uint16(s) + table[hash1] = uint16(s + 1) + hash2 := hash6(cv>>16, tableBits) + + // Check repeat at offset checkRep. + const checkRep = 1 + if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { + base := s + checkRep + // Extend back + for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { + i-- + base-- + } + + // Bail if we exceed the maximum size. + if d+(base-nextEmit) > dstLimit { + return 0 + } + + d += emitLiteral(dst[d:], src[nextEmit:base]) + + // Extend forward + candidate := s - repeat + 4 + checkRep + s += 4 + checkRep + for s <= sLimit { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + if debug { + // Validate match. + if s <= candidate { + panic("s <= candidate") + } + a := src[base:s] + b := src[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + if nextEmit > 0 { + // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. + d += emitRepeat(dst[d:], repeat, s-base) + } else { + // First match, cannot be repeat. + d += emitCopy(dst[d:], repeat, s-base) + } + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + cv = load64(src, s) + continue + } + + if uint32(cv) == load32(src, candidate) { + break + } + candidate = int(table[hash2]) + if uint32(cv>>8) == load32(src, candidate2) { + table[hash2] = uint16(s + 2) + candidate = candidate2 + s++ + break + } + table[hash2] = uint16(s + 2) + if uint32(cv>>16) == load32(src, candidate) { + s += 2 + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards. + // The top bytes will be rechecked to get the full match. + for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { + candidate-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + d += emitLiteral(dst[d:], src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + repeat = base - candidate + + // Extend the 4-byte match as long as possible. + s += 4 + candidate += 4 + for s <= len(src)-8 { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + + d += emitCopy(dst[d:], repeat, s-base) + if debug { + // Validate match. + if s <= candidate { + panic("s <= candidate") + } + a := src[base:s] + b := src[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + // Check for an immediate match, otherwise start search at s+1 + x := load64(src, s-2) + m2Hash := hash6(x, tableBits) + currHash := hash6(x>>16, tableBits) + candidate = int(table[currHash]) + table[m2Hash] = uint16(s - 2) + table[currHash] = uint16(s) + if debug && s == candidate { + panic("s == candidate") + } + if uint32(x>>16) != load32(src, candidate) { + cv = load64(src, s+1) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} + +func encodeBlockSnappyGo(dst, src []byte) (d int) { + // Initialize the hash table. + const ( + tableBits = 14 + maxTableSize = 1 << tableBits + ) var table [maxTableSize]uint32 // sLimit is when to stop looking for offset/length copies. The inputMargin @@ -467,6 +688,197 @@ emitRemainder: return d } +// encodeBlockSnappyGo64K is a special version of encodeBlockSnappyGo for sizes <64KB +func encodeBlockSnappyGo64K(dst, src []byte) (d int) { + // Initialize the hash table. + const ( + tableBits = 14 + maxTableSize = 1 << tableBits + ) + + var table [maxTableSize]uint16 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // Bail if we can't compress to at least this. + dstLimit := len(src) - len(src)>>5 - 5 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + cv := load64(src, s) + + // We search for a repeat at -1, but don't output repeats when nextEmit == 0 + repeat := 1 + + for { + candidate := 0 + for { + // Next src position to check + nextS := s + (s-nextEmit)>>5 + 4 + if nextS > sLimit { + goto emitRemainder + } + hash0 := hash6(cv, tableBits) + hash1 := hash6(cv>>8, tableBits) + candidate = int(table[hash0]) + candidate2 := int(table[hash1]) + table[hash0] = uint16(s) + table[hash1] = uint16(s + 1) + hash2 := hash6(cv>>16, tableBits) + + // Check repeat at offset checkRep. + const checkRep = 1 + if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) { + base := s + checkRep + // Extend back + for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { + i-- + base-- + } + // Bail if we exceed the maximum size. + if d+(base-nextEmit) > dstLimit { + return 0 + } + + d += emitLiteral(dst[d:], src[nextEmit:base]) + + // Extend forward + candidate := s - repeat + 4 + checkRep + s += 4 + checkRep + for s <= sLimit { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + + d += emitCopyNoRepeat(dst[d:], repeat, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + cv = load64(src, s) + continue + } + + if uint32(cv) == load32(src, candidate) { + break + } + candidate = int(table[hash2]) + if uint32(cv>>8) == load32(src, candidate2) { + table[hash2] = uint16(s + 2) + candidate = candidate2 + s++ + break + } + table[hash2] = uint16(s + 2) + if uint32(cv>>16) == load32(src, candidate) { + s += 2 + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards + for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] { + candidate-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + d += emitLiteral(dst[d:], src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + repeat = base - candidate + + // Extend the 4-byte match as long as possible. + s += 4 + candidate += 4 + for s <= len(src)-8 { + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + + d += emitCopyNoRepeat(dst[d:], repeat, s-base) + if false { + // Validate match. + a := src[base:s] + b := src[base-repeat : base-repeat+(s-base)] + if !bytes.Equal(a, b) { + panic("mismatch") + } + } + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + // Check for an immediate match, otherwise start search at s+1 + x := load64(src, s-2) + m2Hash := hash6(x, tableBits) + currHash := hash6(x>>16, tableBits) + candidate = int(table[currHash]) + table[m2Hash] = uint16(s - 2) + table[currHash] = uint16(s) + if uint32(x>>16) != load32(src, candidate) { + cv = load64(src, s+1) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} + // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. diff --git a/vendor/github.com/klauspost/compress/s2/encode_better.go b/vendor/github.com/klauspost/compress/s2/encode_better.go index 544cb1e17..90ebf89c2 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_better.go +++ b/vendor/github.com/klauspost/compress/s2/encode_better.go @@ -348,12 +348,7 @@ func encodeBlockBetterSnappyGo(dst, src []byte) (d int) { nextS := 0 for { // Next src position to check - nextS = (s-nextEmit)>>7 + 1 - if nextS > maxSkip { - nextS = s + maxSkip - } else { - nextS += s - } + nextS = min(s+(s-nextEmit)>>7+1, s+maxSkip) if nextS > sLimit { goto emitRemainder @@ -483,6 +478,415 @@ emitRemainder: return d } +func encodeBlockBetterGo64K(dst, src []byte) (d int) { + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + if len(src) < minNonLiteralBlockSize { + return 0 + } + // Initialize the hash tables. + // Use smaller tables for smaller blocks + const ( + // Long hash matches. + lTableBits = 16 + maxLTableSize = 1 << lTableBits + + // Short hash matches. + sTableBits = 13 + maxSTableSize = 1 << sTableBits + ) + + var lTable [maxLTableSize]uint16 + var sTable [maxSTableSize]uint16 + + // Bail if we can't compress to at least this. + dstLimit := len(src) - len(src)>>5 - 6 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + cv := load64(src, s) + + // We initialize repeat to 0, so we never match on first attempt + repeat := 0 + + for { + candidateL := 0 + nextS := 0 + for { + // Next src position to check + nextS = s + (s-nextEmit)>>6 + 1 + if nextS > sLimit { + goto emitRemainder + } + hashL := hash7(cv, lTableBits) + hashS := hash4(cv, sTableBits) + candidateL = int(lTable[hashL]) + candidateS := int(sTable[hashS]) + lTable[hashL] = uint16(s) + sTable[hashS] = uint16(s) + + valLong := load64(src, candidateL) + valShort := load64(src, candidateS) + + // If long matches at least 8 bytes, use that. + if cv == valLong { + break + } + if cv == valShort { + candidateL = candidateS + break + } + + // Check repeat at offset checkRep. + const checkRep = 1 + // Minimum length of a repeat. Tested with various values. + // While 4-5 offers improvements in some, 6 reduces + // regressions significantly. + const wantRepeatBytes = 6 + const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep) + if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask { + base := s + checkRep + // Extend back + for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; { + i-- + base-- + } + d += emitLiteral(dst[d:], src[nextEmit:base]) + + // Extend forward + candidate := s - repeat + wantRepeatBytes + checkRep + s += wantRepeatBytes + checkRep + for s < len(src) { + if len(src)-s < 8 { + if src[s] == src[candidate] { + s++ + candidate++ + continue + } + break + } + if diff := load64(src, s) ^ load64(src, candidate); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidate += 8 + } + // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset. + d += emitRepeat(dst[d:], repeat, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + // Index in-between + index0 := base + 1 + index1 := s - 2 + + for index0 < index1 { + cv0 := load64(src, index0) + cv1 := load64(src, index1) + lTable[hash7(cv0, lTableBits)] = uint16(index0) + sTable[hash4(cv0>>8, sTableBits)] = uint16(index0 + 1) + + lTable[hash7(cv1, lTableBits)] = uint16(index1) + sTable[hash4(cv1>>8, sTableBits)] = uint16(index1 + 1) + index0 += 2 + index1 -= 2 + } + + cv = load64(src, s) + continue + } + + // Long likely matches 7, so take that. + if uint32(cv) == uint32(valLong) { + break + } + + // Check our short candidate + if uint32(cv) == uint32(valShort) { + // Try a long candidate at s+1 + hashL = hash7(cv>>8, lTableBits) + candidateL = int(lTable[hashL]) + lTable[hashL] = uint16(s + 1) + if uint32(cv>>8) == load32(src, candidateL) { + s++ + break + } + // Use our short candidate. + candidateL = candidateS + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards + for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] { + candidateL-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + base := s + offset := base - candidateL + + // Extend the 4-byte match as long as possible. + s += 4 + candidateL += 4 + for s < len(src) { + if len(src)-s < 8 { + if src[s] == src[candidateL] { + s++ + candidateL++ + continue + } + break + } + if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidateL += 8 + } + + d += emitLiteral(dst[d:], src[nextEmit:base]) + if repeat == offset { + d += emitRepeat(dst[d:], offset, s-base) + } else { + d += emitCopy(dst[d:], offset, s-base) + repeat = offset + } + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + + // Index short & long + index0 := base + 1 + index1 := s - 2 + + cv0 := load64(src, index0) + cv1 := load64(src, index1) + lTable[hash7(cv0, lTableBits)] = uint16(index0) + sTable[hash4(cv0>>8, sTableBits)] = uint16(index0 + 1) + + // lTable could be postponed, but very minor difference. + lTable[hash7(cv1, lTableBits)] = uint16(index1) + sTable[hash4(cv1>>8, sTableBits)] = uint16(index1 + 1) + index0 += 1 + index1 -= 1 + cv = load64(src, s) + + // Index large values sparsely in between. + // We do two starting from different offsets for speed. + index2 := (index0 + index1 + 1) >> 1 + for index2 < index1 { + lTable[hash7(load64(src, index0), lTableBits)] = uint16(index0) + lTable[hash7(load64(src, index2), lTableBits)] = uint16(index2) + index0 += 2 + index2 += 2 + } + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} + +// encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlockBetterSnappyGo64K(dst, src []byte) (d int) { + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + if len(src) < minNonLiteralBlockSize { + return 0 + } + + // Initialize the hash tables. + // Use smaller tables for smaller blocks + const ( + // Long hash matches. + lTableBits = 15 + maxLTableSize = 1 << lTableBits + + // Short hash matches. + sTableBits = 13 + maxSTableSize = 1 << sTableBits + ) + + var lTable [maxLTableSize]uint16 + var sTable [maxSTableSize]uint16 + + // Bail if we can't compress to at least this. + dstLimit := len(src) - len(src)>>5 - 6 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + cv := load64(src, s) + + const maxSkip = 100 + + for { + candidateL := 0 + nextS := 0 + for { + // Next src position to check + nextS = min(s+(s-nextEmit)>>6+1, s+maxSkip) + + if nextS > sLimit { + goto emitRemainder + } + hashL := hash7(cv, lTableBits) + hashS := hash4(cv, sTableBits) + candidateL = int(lTable[hashL]) + candidateS := int(sTable[hashS]) + lTable[hashL] = uint16(s) + sTable[hashS] = uint16(s) + + if uint32(cv) == load32(src, candidateL) { + break + } + + // Check our short candidate + if uint32(cv) == load32(src, candidateS) { + // Try a long candidate at s+1 + hashL = hash7(cv>>8, lTableBits) + candidateL = int(lTable[hashL]) + lTable[hashL] = uint16(s + 1) + if uint32(cv>>8) == load32(src, candidateL) { + s++ + break + } + // Use our short candidate. + candidateL = candidateS + break + } + + cv = load64(src, nextS) + s = nextS + } + + // Extend backwards + for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] { + candidateL-- + s-- + } + + // Bail if we exceed the maximum size. + if d+(s-nextEmit) > dstLimit { + return 0 + } + + base := s + offset := base - candidateL + + // Extend the 4-byte match as long as possible. + s += 4 + candidateL += 4 + for s < len(src) { + if len(src)-s < 8 { + if src[s] == src[candidateL] { + s++ + candidateL++ + continue + } + break + } + if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 { + s += bits.TrailingZeros64(diff) >> 3 + break + } + s += 8 + candidateL += 8 + } + + d += emitLiteral(dst[d:], src[nextEmit:base]) + d += emitCopyNoRepeat(dst[d:], offset, s-base) + + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + if d > dstLimit { + // Do we have space for more, if not bail. + return 0 + } + + // Index short & long + index0 := base + 1 + index1 := s - 2 + + cv0 := load64(src, index0) + cv1 := load64(src, index1) + lTable[hash7(cv0, lTableBits)] = uint16(index0) + sTable[hash4(cv0>>8, sTableBits)] = uint16(index0 + 1) + + lTable[hash7(cv1, lTableBits)] = uint16(index1) + sTable[hash4(cv1>>8, sTableBits)] = uint16(index1 + 1) + index0 += 1 + index1 -= 1 + cv = load64(src, s) + + // Index large values sparsely in between. + // We do two starting from different offsets for speed. + index2 := (index0 + index1 + 1) >> 1 + for index2 < index1 { + lTable[hash7(load64(src, index0), lTableBits)] = uint16(index0) + lTable[hash7(load64(src, index2), lTableBits)] = uint16(index2) + index0 += 2 + index2 += 2 + } + } + +emitRemainder: + if nextEmit < len(src) { + // Bail if we exceed the maximum size. + if d+len(src)-nextEmit > dstLimit { + return 0 + } + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} + // encodeBlockBetterDict encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index dd1c973ca..e25b78445 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -21,6 +21,9 @@ func encodeBlock(dst, src []byte) (d int) { if len(src) < minNonLiteralBlockSize { return 0 } + if len(src) <= 64<<10 { + return encodeBlockGo64K(dst, src) + } return encodeBlockGo(dst, src) } @@ -32,6 +35,9 @@ func encodeBlock(dst, src []byte) (d int) { // // len(dst) >= MaxEncodedLen(len(src)) func encodeBlockBetter(dst, src []byte) (d int) { + if len(src) <= 64<<10 { + return encodeBlockBetterGo64K(dst, src) + } return encodeBlockBetterGo(dst, src) } @@ -43,6 +49,9 @@ func encodeBlockBetter(dst, src []byte) (d int) { // // len(dst) >= MaxEncodedLen(len(src)) func encodeBlockBetterSnappy(dst, src []byte) (d int) { + if len(src) <= 64<<10 { + return encodeBlockBetterSnappyGo64K(dst, src) + } return encodeBlockBetterSnappyGo(dst, src) } @@ -57,6 +66,9 @@ func encodeBlockSnappy(dst, src []byte) (d int) { if len(src) < minNonLiteralBlockSize { return 0 } + if len(src) <= 64<<10 { + return encodeBlockSnappyGo64K(dst, src) + } return encodeBlockSnappyGo(dst, src) } |
