summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--go.mod8
-rw-r--r--go.sum15
-rw-r--r--vendor/github.com/dustin/go-humanize/.travis.yml16
-rw-r--r--vendor/github.com/dustin/go-humanize/README.markdown2
-rw-r--r--vendor/github.com/dustin/go-humanize/bigbytes.go20
-rw-r--r--vendor/github.com/dustin/go-humanize/commaf.go1
-rw-r--r--vendor/github.com/dustin/go-humanize/ftoa.go3
-rw-r--r--vendor/github.com/dustin/go-humanize/number.go2
-rw-r--r--vendor/github.com/dustin/go-humanize/si.go4
-rw-r--r--vendor/github.com/klauspost/compress/flate/deflate.go132
-rw-r--r--vendor/github.com/klauspost/compress/flate/dict_decoder.go24
-rw-r--r--vendor/github.com/klauspost/compress/flate/fast_encoder.go59
-rw-r--r--vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go20
-rw-r--r--vendor/github.com/klauspost/compress/flate/huffman_code.go15
-rw-r--r--vendor/github.com/klauspost/compress/flate/level1.go27
-rw-r--r--vendor/github.com/klauspost/compress/flate/level2.go35
-rw-r--r--vendor/github.com/klauspost/compress/flate/level3.go41
-rw-r--r--vendor/github.com/klauspost/compress/flate/level4.go11
-rw-r--r--vendor/github.com/klauspost/compress/flate/level5.go28
-rw-r--r--vendor/github.com/klauspost/compress/flate/level6.go30
-rw-r--r--vendor/github.com/klauspost/compress/flate/stateless.go19
-rw-r--r--vendor/github.com/klauspost/compress/s2/README.md283
-rw-r--r--vendor/github.com/klauspost/compress/s2/decode.go6
-rw-r--r--vendor/github.com/klauspost/compress/s2/decode_other.go34
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_all.go3
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_amd64.go12
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_best.go35
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_better.go105
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_go.go9
-rw-r--r--vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go23
-rw-r--r--vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s919
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/README.md229
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/cpuid.go146
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/featureid_string.go363
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go2
-rw-r--r--vendor/github.com/minio/minio-go/v7/api-put-object.go28
-rw-r--r--vendor/github.com/minio/minio-go/v7/api.go2
-rw-r--r--vendor/modules.txt12
38 files changed, 1698 insertions, 1025 deletions
diff --git a/go.mod b/go.mod
index 395f240b3..7e1532486 100644
--- a/go.mod
+++ b/go.mod
@@ -35,7 +35,7 @@ require (
github.com/jackc/pgx/v4 v4.17.2
github.com/microcosm-cc/bluemonday v1.0.22
github.com/miekg/dns v1.1.50
- github.com/minio/minio-go/v7 v7.0.48
+ github.com/minio/minio-go/v7 v7.0.49
github.com/mitchellh/mapstructure v1.5.0
github.com/oklog/ulid v1.3.1
github.com/spf13/cobra v1.6.1
@@ -89,7 +89,7 @@ require (
github.com/dsoprea/go-photoshop-info-format v0.0.0-20200610045659-121dd752914d // indirect
github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d // indirect
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect
- github.com/dustin/go-humanize v1.0.0 // indirect
+ github.com/dustin/go-humanize v1.0.1 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-errors/errors v1.4.1 // indirect
@@ -118,8 +118,8 @@ require (
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect
- github.com/klauspost/compress v1.15.9 // indirect
- github.com/klauspost/cpuid/v2 v2.1.1 // indirect
+ github.com/klauspost/compress v1.15.15 // indirect
+ github.com/klauspost/cpuid/v2 v2.2.3 // indirect
github.com/leodido/go-urn v1.2.1 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
diff --git a/go.sum b/go.sum
index 353075397..1d70f510d 100644
--- a/go.sum
+++ b/go.sum
@@ -161,8 +161,9 @@ github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d/
github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU=
-github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
@@ -398,13 +399,13 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.10.10/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
-github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
+github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw=
+github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.1.1 h1:t0wUqjowdm8ezddV5k0tLWVklVuvLJpoHeb4WBdydm0=
-github.com/klauspost/cpuid/v2 v2.1.1/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
+github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU=
+github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
@@ -443,8 +444,8 @@ github.com/miekg/dns v1.1.50 h1:DQUfb9uc6smULcREF09Uc+/Gd46YWqJd5DbpPE9xkcA=
github.com/miekg/dns v1.1.50/go.mod h1:e3IlAVfNqAllflbibAZEWOXOQ+Ynzk/dDozDxY7XnME=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
-github.com/minio/minio-go/v7 v7.0.48 h1:VQtYB/2xHW2SlxqhjRlDpvSiSOfGlyFlXZF1EHARPHM=
-github.com/minio/minio-go/v7 v7.0.48/go.mod h1:nCrRzjoSUQh8hgKKtu3Y708OLvRLtuASMg2/nvmbarw=
+github.com/minio/minio-go/v7 v7.0.49 h1:dE5DfOtnXMXCjr/HWI6zN9vCrY6Sv666qhhiwUMvGV4=
+github.com/minio/minio-go/v7 v7.0.49/go.mod h1:UI34MvQEiob3Cf/gGExGMmzugkM/tNgbFypNDy5LMVc=
github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g=
github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM=
github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
diff --git a/vendor/github.com/dustin/go-humanize/.travis.yml b/vendor/github.com/dustin/go-humanize/.travis.yml
index ba95cdd15..ac12e485a 100644
--- a/vendor/github.com/dustin/go-humanize/.travis.yml
+++ b/vendor/github.com/dustin/go-humanize/.travis.yml
@@ -1,12 +1,12 @@
sudo: false
language: go
+go_import_path: github.com/dustin/go-humanize
go:
- - 1.3.x
- - 1.5.x
- - 1.6.x
- - 1.7.x
- - 1.8.x
- - 1.9.x
+ - 1.13.x
+ - 1.14.x
+ - 1.15.x
+ - 1.16.x
+ - stable
- master
matrix:
allow_failures:
@@ -15,7 +15,7 @@ matrix:
install:
- # Do nothing. This is needed to prevent default install action "go get -t -v ./..." from happening here (we want it to happen inside script step).
script:
- - go get -t -v ./...
- diff -u <(echo -n) <(gofmt -d -s .)
- - go tool vet .
+ - go vet .
+ - go install -v -race ./...
- go test -v -race ./...
diff --git a/vendor/github.com/dustin/go-humanize/README.markdown b/vendor/github.com/dustin/go-humanize/README.markdown
index 91b4ae564..7d0b16b34 100644
--- a/vendor/github.com/dustin/go-humanize/README.markdown
+++ b/vendor/github.com/dustin/go-humanize/README.markdown
@@ -5,7 +5,7 @@ Just a few functions for helping humanize times and sizes.
`go get` it as `github.com/dustin/go-humanize`, import it as
`"github.com/dustin/go-humanize"`, use it as `humanize`.
-See [godoc](https://godoc.org/github.com/dustin/go-humanize) for
+See [godoc](https://pkg.go.dev/github.com/dustin/go-humanize) for
complete documentation.
## Sizes
diff --git a/vendor/github.com/dustin/go-humanize/bigbytes.go b/vendor/github.com/dustin/go-humanize/bigbytes.go
index 1a2bf6172..3b015fd59 100644
--- a/vendor/github.com/dustin/go-humanize/bigbytes.go
+++ b/vendor/github.com/dustin/go-humanize/bigbytes.go
@@ -28,6 +28,10 @@ var (
BigZiByte = (&big.Int{}).Mul(BigEiByte, bigIECExp)
// BigYiByte is 1,024 z bytes in bit.Ints
BigYiByte = (&big.Int{}).Mul(BigZiByte, bigIECExp)
+ // BigRiByte is 1,024 y bytes in bit.Ints
+ BigRiByte = (&big.Int{}).Mul(BigYiByte, bigIECExp)
+ // BigQiByte is 1,024 r bytes in bit.Ints
+ BigQiByte = (&big.Int{}).Mul(BigRiByte, bigIECExp)
)
var (
@@ -51,6 +55,10 @@ var (
BigZByte = (&big.Int{}).Mul(BigEByte, bigSIExp)
// BigYByte is 1,000 SI z bytes in big.Ints
BigYByte = (&big.Int{}).Mul(BigZByte, bigSIExp)
+ // BigRByte is 1,000 SI y bytes in big.Ints
+ BigRByte = (&big.Int{}).Mul(BigYByte, bigSIExp)
+ // BigQByte is 1,000 SI r bytes in big.Ints
+ BigQByte = (&big.Int{}).Mul(BigRByte, bigSIExp)
)
var bigBytesSizeTable = map[string]*big.Int{
@@ -71,6 +79,10 @@ var bigBytesSizeTable = map[string]*big.Int{
"zb": BigZByte,
"yib": BigYiByte,
"yb": BigYByte,
+ "rib": BigRiByte,
+ "rb": BigRByte,
+ "qib": BigQiByte,
+ "qb": BigQByte,
// Without suffix
"": BigByte,
"ki": BigKiByte,
@@ -89,6 +101,10 @@ var bigBytesSizeTable = map[string]*big.Int{
"zi": BigZiByte,
"y": BigYByte,
"yi": BigYiByte,
+ "r": BigRByte,
+ "ri": BigRiByte,
+ "q": BigQByte,
+ "qi": BigQiByte,
}
var ten = big.NewInt(10)
@@ -115,7 +131,7 @@ func humanateBigBytes(s, base *big.Int, sizes []string) string {
//
// BigBytes(82854982) -> 83 MB
func BigBytes(s *big.Int) string {
- sizes := []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
+ sizes := []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", "RB", "QB"}
return humanateBigBytes(s, bigSIExp, sizes)
}
@@ -125,7 +141,7 @@ func BigBytes(s *big.Int) string {
//
// BigIBytes(82854982) -> 79 MiB
func BigIBytes(s *big.Int) string {
- sizes := []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
+ sizes := []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", "RiB", "QiB"}
return humanateBigBytes(s, bigIECExp, sizes)
}
diff --git a/vendor/github.com/dustin/go-humanize/commaf.go b/vendor/github.com/dustin/go-humanize/commaf.go
index 620690dec..2bc83a03c 100644
--- a/vendor/github.com/dustin/go-humanize/commaf.go
+++ b/vendor/github.com/dustin/go-humanize/commaf.go
@@ -1,3 +1,4 @@
+//go:build go1.6
// +build go1.6
package humanize
diff --git a/vendor/github.com/dustin/go-humanize/ftoa.go b/vendor/github.com/dustin/go-humanize/ftoa.go
index 1c62b640d..bce923f37 100644
--- a/vendor/github.com/dustin/go-humanize/ftoa.go
+++ b/vendor/github.com/dustin/go-humanize/ftoa.go
@@ -6,6 +6,9 @@ import (
)
func stripTrailingZeros(s string) string {
+ if !strings.ContainsRune(s, '.') {
+ return s
+ }
offset := len(s) - 1
for offset > 0 {
if s[offset] == '.' {
diff --git a/vendor/github.com/dustin/go-humanize/number.go b/vendor/github.com/dustin/go-humanize/number.go
index dec618659..6470d0d47 100644
--- a/vendor/github.com/dustin/go-humanize/number.go
+++ b/vendor/github.com/dustin/go-humanize/number.go
@@ -73,7 +73,7 @@ func FormatFloat(format string, n float64) string {
if n > math.MaxFloat64 {
return "Infinity"
}
- if n < -math.MaxFloat64 {
+ if n < (0.0 - math.MaxFloat64) {
return "-Infinity"
}
diff --git a/vendor/github.com/dustin/go-humanize/si.go b/vendor/github.com/dustin/go-humanize/si.go
index ae659e0e4..8b8501984 100644
--- a/vendor/github.com/dustin/go-humanize/si.go
+++ b/vendor/github.com/dustin/go-humanize/si.go
@@ -8,6 +8,8 @@ import (
)
var siPrefixTable = map[float64]string{
+ -30: "q", // quecto
+ -27: "r", // ronto
-24: "y", // yocto
-21: "z", // zepto
-18: "a", // atto
@@ -25,6 +27,8 @@ var siPrefixTable = map[float64]string{
18: "E", // exa
21: "Z", // zetta
24: "Y", // yotta
+ 27: "R", // ronna
+ 30: "Q", // quetta
}
var revSIPrefixTable = revfmap(siPrefixTable)
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index f8435998e..82882961a 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -131,7 +131,8 @@ func (d *compressor) fillDeflate(b []byte) int {
s := d.state
if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
// shift the window by windowSize
- copy(d.window[:], d.window[windowSize:2*windowSize])
+ //copy(d.window[:], d.window[windowSize:2*windowSize])
+ *(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:])
s.index -= windowSize
d.windowEnd -= windowSize
if d.blockStart >= windowSize {
@@ -293,7 +294,6 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of
}
offset = 0
- cGain := 0
if d.chain < 100 {
for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] {
@@ -321,10 +321,14 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of
return
}
+ // Minimum gain to accept a match.
+ cGain := 4
+
// Some like it higher (CSV), some like it lower (JSON)
- const baseCost = 6
+ const baseCost = 3
// Base is 4 bytes at with an additional cost.
// Matches must be better than this.
+
for i := prevHead; tries > 0; tries-- {
if wEnd == win[i+length] {
n := matchLen(win[i:i+minMatchLook], wPos)
@@ -332,7 +336,7 @@ func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, of
// Calculate gain. Estimate
newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]])
- //fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]))
+ //fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length)
if newGain > cGain {
length = n
offset = pos - i
@@ -373,6 +377,12 @@ func hash4(b []byte) uint32 {
return hash4u(binary.LittleEndian.Uint32(b), hashBits)
}
+// hash4 returns the hash of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <32.
+func hash4u(u uint32, h uint8) uint32 {
+ return (u * prime4bytes) >> (32 - h)
+}
+
// bulkHash4 will compute hashes using the same
// algorithm as hash4
func bulkHash4(b []byte, dst []uint32) {
@@ -483,27 +493,103 @@ func (d *compressor) deflateLazy() {
}
if prevLength >= minMatchLength && s.length <= prevLength {
- // Check for better match at end...
+ // No better match, but check for better match at end...
//
- // checkOff must be >=2 since we otherwise risk checking s.index
- // Offset of 2 seems to yield best results.
+ // Skip forward a number of bytes.
+ // Offset of 2 seems to yield best results. 3 is sometimes better.
const checkOff = 2
- prevIndex := s.index - 1
- if prevIndex+prevLength+checkOff < s.maxInsertIndex {
- end := lookahead
- if lookahead > maxMatchLength {
- end = maxMatchLength
- }
- end += prevIndex
- idx := prevIndex + prevLength - (4 - checkOff)
- h := hash4(d.window[idx:])
- ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + (4 - checkOff)
- if ch2 > minIndex {
- length := matchLen(d.window[prevIndex:end], d.window[ch2:])
- // It seems like a pure length metric is best.
- if length > prevLength {
- prevLength = length
- prevOffset = prevIndex - ch2
+
+ // Check all, except full length
+ if prevLength < maxMatchLength-checkOff {
+ prevIndex := s.index - 1
+ if prevIndex+prevLength < s.maxInsertIndex {
+ end := lookahead
+ if lookahead > maxMatchLength+checkOff {
+ end = maxMatchLength + checkOff
+ }
+ end += prevIndex
+
+ // Hash at match end.
+ h := hash4(d.window[prevIndex+prevLength:])
+ ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength
+ if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff {
+ length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:])
+ // It seems like a pure length metric is best.
+ if length > prevLength {
+ prevLength = length
+ prevOffset = prevIndex - ch2
+
+ // Extend back...
+ for i := checkOff - 1; i >= 0; i-- {
+ if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] {
+ // Emit tokens we "owe"
+ for j := 0; j <= i; j++ {
+ d.tokens.AddLiteral(d.window[prevIndex+j])
+ if d.tokens.n == maxFlateBlockTokens {
+ // The block includes the current character
+ if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+ return
+ }
+ d.tokens.Reset()
+ }
+ s.index++
+ if s.index < s.maxInsertIndex {
+ h := hash4(d.window[s.index:])
+ ch := s.hashHead[h]
+ s.chainHead = int(ch)
+ s.hashPrev[s.index&windowMask] = ch
+ s.hashHead[h] = uint32(s.index + s.hashOffset)
+ }
+ }
+ break
+ } else {
+ prevLength++
+ }
+ }
+ } else if false {
+ // Check one further ahead.
+ // Only rarely better, disabled for now.
+ prevIndex++
+ h := hash4(d.window[prevIndex+prevLength:])
+ ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength
+ if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff {
+ length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:])
+ // It seems like a pure length metric is best.
+ if length > prevLength+checkOff {
+ prevLength = length
+ prevOffset = prevIndex - ch2
+ prevIndex--
+
+ // Extend back...
+ for i := checkOff; i >= 0; i-- {
+ if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] {
+ // Emit tokens we "owe"
+ for j := 0; j <= i; j++ {
+ d.tokens.AddLiteral(d.window[prevIndex+j])
+ if d.tokens.n == maxFlateBlockTokens {
+ // The block includes the current character
+ if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil {
+ return
+ }
+ d.tokens.Reset()
+ }
+ s.index++
+ if s.index < s.maxInsertIndex {
+ h := hash4(d.window[s.index:])
+ ch := s.hashHead[h]
+ s.chainHead = int(ch)
+ s.hashPrev[s.index&windowMask] = ch
+ s.hashHead[h] = uint32(s.index + s.hashOffset)
+ }
+ }
+ break
+ } else {
+ prevLength++
+ }
+ }
+ }
+ }
+ }
}
}
}
diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go
index 71c75a065..bb36351a5 100644
--- a/vendor/github.com/klauspost/compress/flate/dict_decoder.go
+++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go
@@ -7,19 +7,19 @@ package flate
// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
// LZ77 decompresses data through sequences of two forms of commands:
//
-// * Literal insertions: Runs of one or more symbols are inserted into the data
-// stream as is. This is accomplished through the writeByte method for a
-// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
-// Any valid stream must start with a literal insertion if no preset dictionary
-// is used.
+// - Literal insertions: Runs of one or more symbols are inserted into the data
+// stream as is. This is accomplished through the writeByte method for a
+// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
+// Any valid stream must start with a literal insertion if no preset dictionary
+// is used.
//
-// * Backward copies: Runs of one or more symbols are copied from previously
-// emitted data. Backward copies come as the tuple (dist, length) where dist
-// determines how far back in the stream to copy from and length determines how
-// many bytes to copy. Note that it is valid for the length to be greater than
-// the distance. Since LZ77 uses forward copies, that situation is used to
-// perform a form of run-length encoding on repeated runs of symbols.
-// The writeCopy and tryWriteCopy are used to implement this command.
+// - Backward copies: Runs of one or more symbols are copied from previously
+// emitted data. Backward copies come as the tuple (dist, length) where dist
+// determines how far back in the stream to copy from and length determines how
+// many bytes to copy. Note that it is valid for the length to be greater than
+// the distance. Since LZ77 uses forward copies, that situation is used to
+// perform a form of run-length encoding on repeated runs of symbols.
+// The writeCopy and tryWriteCopy are used to implement this command.
//
// For performance reasons, this implementation performs little to no sanity
// checks about the arguments. As such, the invariants documented for each
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
index f781aaa62..24caf5f70 100644
--- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@@ -58,17 +58,6 @@ const (
prime8bytes = 0xcf1bbcdcb7a56463
)
-func load32(b []byte, i int) uint32 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:4]
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
-}
-
-func load64(b []byte, i int) uint64 {
- return binary.LittleEndian.Uint64(b[i:])
-}
-
func load3232(b []byte, i int32) uint32 {
return binary.LittleEndian.Uint32(b[i:])
}
@@ -77,10 +66,6 @@ func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:])
}
-func hash(u uint32) uint32 {
- return (u * 0x1e35a7bd) >> tableShift
-}
-
type tableEntry struct {
offset int32
}
@@ -104,7 +89,8 @@ func (e *fastGen) addBlock(src []byte) int32 {
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
- copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+ // copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+ *(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
@@ -114,39 +100,36 @@ func (e *fastGen) addBlock(src []byte) int32 {
return s
}
-// hash4 returns the hash of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <32.
-func hash4u(u uint32, h uint8) uint32 {
- return (u * prime4bytes) >> (32 - h)
-}
-
type tableEntryPrev struct {
Cur tableEntry
Prev tableEntry
}
-// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <32.
-func hash4x64(u uint64, h uint8) uint32 {
- return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32)
-}
-
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
}
-// hash8 returns the hash of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash8(u uint64, h uint8) uint32 {
- return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64))
-}
-
-// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash6(u uint64, h uint8) uint32 {
- return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64))
+// hashLen returns a hash of the lowest mls bytes of with length output bits.
+// mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
+// length should always be < 32.
+// Preferably length and mls should be a constant for inlining.
+func hashLen(u uint64, length, mls uint8) uint32 {
+ switch mls {
+ case 3:
+ return (uint32(u<<8) * prime3bytes) >> (32 - length)
+ case 5:
+ return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length))
+ case 6:
+ return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length))
+ case 7:
+ return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length))
+ case 8:
+ return uint32((u * prime8bytes) >> (64 - length))
+ default:
+ return (uint32(u) * prime4bytes) >> (32 - length)
+ }
}
// matchlen will return the match length between offsets and t in src.
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
index 40ef45c2f..89a5dd89f 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@@ -265,9 +265,9 @@ func (w *huffmanBitWriter) writeBytes(bytes []byte) {
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
// information. Code badCode is an end marker
//
-// numLiterals The number of literals in literalEncoding
-// numOffsets The number of offsets in offsetEncoding
-// litenc, offenc The literal and offset encoder to use
+// numLiterals The number of literals in literalEncoding
+// numOffsets The number of offsets in offsetEncoding
+// litenc, offenc The literal and offset encoder to use
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) {
for i := range w.codegenFreq {
w.codegenFreq[i] = 0
@@ -460,9 +460,9 @@ func (w *huffmanBitWriter) writeOutBits() {
// Write the header of a dynamic Huffman block to the output stream.
//
-// numLiterals The number of literals specified in codegen
-// numOffsets The number of offsets specified in codegen
-// numCodegens The number of codegens used in codegen
+// numLiterals The number of literals specified in codegen
+// numOffsets The number of offsets specified in codegen
+// numCodegens The number of codegens used in codegen
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
if w.err != nil {
return
@@ -790,9 +790,11 @@ func (w *huffmanBitWriter) fillTokens() {
// and offsetEncoding.
// The number of literal and offset tokens is returned.
func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) {
- copy(w.literalFreq[:], t.litHist[:])
- copy(w.literalFreq[256:], t.extraHist[:])
- copy(w.offsetFreq[:], t.offHist[:offsetCodeCount])
+ //copy(w.literalFreq[:], t.litHist[:])
+ *(*[256]uint16)(w.literalFreq[:]) = t.litHist
+ //copy(w.literalFreq[256:], t.extraHist[:])
+ *(*[32]uint16)(w.literalFreq[256:]) = t.extraHist
+ w.offsetFreq = t.offHist
if t.n == 0 {
return
diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go
index 5ac144f28..be7b58b47 100644
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@@ -168,13 +168,18 @@ func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
// The cases of 0, 1, and 2 literals are handled by special case code.
//
// list An array of the literals with non-zero frequencies
-// and their associated frequencies. The array is in order of increasing
-// frequency, and has as its last element a special element with frequency
-// MaxInt32
+//
+// and their associated frequencies. The array is in order of increasing
+// frequency, and has as its last element a special element with frequency
+// MaxInt32
+//
// maxBits The maximum number of bits that should be used to encode any literal.
-// Must be less than 16.
+//
+// Must be less than 16.
+//
// return An integer array in which array[i] indicates the number of literals
-// that should be encoded in i bits.
+//
+// that should be encoded in i bits.
func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 {
if maxBits >= maxBitsLimit {
panic("flate: maxBits too large")
diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go
index 0f14f8d63..703b9a89a 100644
--- a/vendor/github.com/klauspost/compress/flate/level1.go
+++ b/vendor/github.com/klauspost/compress/flate/level1.go
@@ -19,6 +19,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashBytes = 5
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
@@ -68,7 +69,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
- cv := load3232(src, s)
+ cv := load6432(src, s)
for {
const skipLog = 5
@@ -77,7 +78,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
- nextHash := hash(cv)
+ nextHash := hashLen(cv, tableBits, hashBytes)
candidate = e.table[nextHash]
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
@@ -86,16 +87,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
- nextHash = hash(uint32(now))
+ nextHash = hashLen(now, tableBits, hashBytes)
offset := s - (candidate.offset - e.cur)
- if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
// Do one right away...
- cv = uint32(now)
+ cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
@@ -103,11 +104,11 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
- if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
- cv = uint32(now)
+ cv = now
s = nextS
}
@@ -198,9 +199,9 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
}
if s >= sLimit {
// Index first pair after match end.
- if int(s+l+4) < len(src) {
- cv := load3232(src, s)
- e.table[hash(cv)] = tableEntry{offset: s + e.cur}
+ if int(s+l+8) < len(src) {
+ cv := load6432(src, s)
+ e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
@@ -213,16 +214,16 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
- prevHash := hash(uint32(x))
+ prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
x >>= 16
- currHash := hash(uint32(x))
+ currHash := hashLen(x, tableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) {
- cv = uint32(x >> 8)
+ cv = x >> 8
s++
break
}
diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go
index 8603fbd55..876dfbe30 100644
--- a/vendor/github.com/klauspost/compress/flate/level2.go
+++ b/vendor/github.com/klauspost/compress/flate/level2.go
@@ -16,6 +16,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashBytes = 5
)
if debugDeflate && e.cur < 0 {
@@ -66,7 +67,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
- cv := load3232(src, s)
+ cv := load6432(src, s)
for {
// When should we start skipping if we haven't found matches in a long while.
const skipLog = 5
@@ -75,7 +76,7 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
nextS := s
var candidate tableEntry
for {
- nextHash := hash4u(cv, bTableBits)
+ nextHash := hashLen(cv, bTableBits, hashBytes)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
if nextS > sLimit {
@@ -84,16 +85,16 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
candidate = e.table[nextHash]
now := load6432(src, nextS)
e.table[nextHash] = tableEntry{offset: s + e.cur}
- nextHash = hash4u(uint32(now), bTableBits)
+ nextHash = hashLen(now, bTableBits, hashBytes)
offset := s - (candidate.offset - e.cur)
- if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
e.table[nextHash] = tableEntry{offset: nextS + e.cur}
break
}
// Do one right away...
- cv = uint32(now)
+ cv = now
s = nextS
nextS++
candidate = e.table[nextHash]
@@ -101,10 +102,10 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
e.table[nextHash] = tableEntry{offset: s + e.cur}
offset = s - (candidate.offset - e.cur)
- if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
- cv = uint32(now)
+ cv = now
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
@@ -154,9 +155,9 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
if s >= sLimit {
// Index first pair after match end.
- if int(s+l+4) < len(src) {
- cv := load3232(src, s)
- e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur}
+ if int(s+l+8) < len(src) {
+ cv := load6432(src, s)
+ e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
}
@@ -164,15 +165,15 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// Store every second hash in-between, but offset by 1.
for i := s - l + 2; i < s-5; i += 7 {
x := load6432(src, i)
- nextHash := hash4u(uint32(x), bTableBits)
+ nextHash := hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i}
// Skip one
x >>= 16
- nextHash = hash4u(uint32(x), bTableBits)
+ nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 2}
// Skip one
x >>= 16
- nextHash = hash4u(uint32(x), bTableBits)
+ nextHash = hashLen(x, bTableBits, hashBytes)
e.table[nextHash] = tableEntry{offset: e.cur + i + 4}
}
@@ -184,17 +185,17 @@ func (e *fastEncL2) Encode(dst *tokens, src []byte) {
// three load32 calls.
x := load6432(src, s-2)
o := e.cur + s - 2
- prevHash := hash4u(uint32(x), bTableBits)
- prevHash2 := hash4u(uint32(x>>8), bTableBits)
+ prevHash := hashLen(x, bTableBits, hashBytes)
+ prevHash2 := hashLen(x>>8, bTableBits, hashBytes)
e.table[prevHash] = tableEntry{offset: o}
e.table[prevHash2] = tableEntry{offset: o + 1}
- currHash := hash4u(uint32(x>>16), bTableBits)
+ currHash := hashLen(x>>16, bTableBits, hashBytes)
candidate = e.table[currHash]
e.table[currHash] = tableEntry{offset: o + 2}
offset := s - (candidate.offset - e.cur)
if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) {
- cv = uint32(x >> 24)
+ cv = x >> 24
s++
break
}
diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go
index 039639f89..7aa2b72a1 100644
--- a/vendor/github.com/klauspost/compress/flate/level3.go
+++ b/vendor/github.com/klauspost/compress/flate/level3.go
@@ -11,10 +11,11 @@ type fastEncL3 struct {
// Encode uses a similar algorithm to level 2, will check up to two candidates.
func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const (
- inputMargin = 8 - 1
+ inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
tableBits = 16
tableSize = 1 << tableBits
+ hashBytes = 5
)
if debugDeflate && e.cur < 0 {
@@ -69,20 +70,20 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
- cv := load3232(src, s)
+ cv := load6432(src, s)
for {
- const skipLog = 6
+ const skipLog = 7
nextS := s
var candidate tableEntry
for {
- nextHash := hash4u(cv, tableBits)
+ nextHash := hashLen(cv, tableBits, hashBytes)
s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidates := e.table[nextHash]
- now := load3232(src, nextS)
+ now := load6432(src, nextS)
// Safe offset distance until s + 4...
minOffset := e.cur + s - (maxMatchOffset - 4)
@@ -96,8 +97,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
continue
}
- if cv == load3232(src, candidate.offset-e.cur) {
- if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) {
+ if uint32(cv) == load3232(src, candidate.offset-e.cur) {
+ if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) {
break
}
// Both match and are valid, pick longest.
@@ -112,7 +113,7 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
- if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
break
}
}
@@ -164,9 +165,9 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
if s >= sLimit {
t += l
// Index first pair after match end.
- if int(t+4) < len(src) && t > 0 {
- cv := load3232(src, t)
- nextHash := hash4u(cv, tableBits)
+ if int(t+8) < len(src) && t > 0 {
+ cv = load6432(src, t)
+ nextHash := hashLen(cv, tableBits, hashBytes)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t},
@@ -176,8 +177,8 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
}
// Store every 5th hash in-between.
- for i := s - l + 2; i < s-5; i += 5 {
- nextHash := hash4u(load3232(src, i), tableBits)
+ for i := s - l + 2; i < s-5; i += 6 {
+ nextHash := hashLen(load6432(src, i), tableBits, hashBytes)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + i}}
@@ -185,23 +186,23 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s.
x := load6432(src, s-2)
- prevHash := hash4u(uint32(x), tableBits)
+ prevHash := hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2},
}
x >>= 8
- prevHash = hash4u(uint32(x), tableBits)
+ prevHash = hashLen(x, tableBits, hashBytes)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1},
}
x >>= 8
- currHash := hash4u(uint32(x), tableBits)
+ currHash := hashLen(x, tableBits, hashBytes)
candidates := e.table[currHash]
- cv = uint32(x)
+ cv = x
e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur},
@@ -212,17 +213,17 @@ func (e *fastEncL3) Encode(dst *tokens, src []byte) {
minOffset := e.cur + s - (maxMatchOffset - 4)
if candidate.offset > minOffset {
- if cv == load3232(src, candidate.offset-e.cur) {
+ if uint32(cv) == load3232(src, candidate.offset-e.cur) {
// Found a match...
continue
}
candidate = candidates.Prev
- if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
+ if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) {
// Match at prev...
continue
}
}
- cv = uint32(x >> 8)
+ cv = x >> 8
s++
break
}
diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go
index 1cbffa1ae..23c08b325 100644
--- a/vendor/github.com/klauspost/compress/flate/level4.go
+++ b/vendor/github.com/klauspost/compress/flate/level4.go
@@ -12,6 +12,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
@@ -80,7 +81,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
nextS := s
var t int32
for {
- nextHashS := hash4x64(cv, tableBits)
+ nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
@@ -168,7 +169,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
// Index first pair after match end.
if int(s+8) < len(src) {
cv := load6432(src, s)
- e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur}
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur}
e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
}
goto emitRemainder
@@ -183,7 +184,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
- e.table[hash4u(uint32(cv>>8), tableBits)] = t2
+ e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
i += 3
for ; i < s-1; i += 3 {
@@ -192,7 +193,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1}
e.bTable[hash7(cv, tableBits)] = t
e.bTable[hash7(cv>>8, tableBits)] = t2
- e.table[hash4u(uint32(cv>>8), tableBits)] = t2
+ e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
@@ -201,7 +202,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
- prevHashS := hash4x64(x, tableBits)
+ prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
e.bTable[prevHashL] = tableEntry{offset: o}
diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go
index 4b97576bd..83ef50ba4 100644
--- a/vendor/github.com/klauspost/compress/flate/level5.go
+++ b/vendor/github.com/klauspost/compress/flate/level5.go
@@ -12,6 +12,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
@@ -88,7 +89,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
var l int32
var t int32
for {
- nextHashS := hash4x64(cv, tableBits)
+ nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
@@ -105,7 +106,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
eLong := &e.bTable[nextHashL]
eLong.Cur, eLong.Prev = entry, eLong.Cur
- nextHashS = hash4x64(next, tableBits)
+ nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
@@ -191,14 +192,21 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit {
+ // Allow some bytes at the beginning to mismatch.
+ // Sweet spot is 2/3 bytes depending on input.
+ // 3 is only a little better when it is but sometimes a lot worse.
+ // The skipped bytes are tested in Extend backwards,
+ // and still picked up as part of the match if they do.
+ const skipBeginning = 2
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
- // Test current
- t2 := eLong - e.cur - l
- off := s - t2
+ t2 := eLong - e.cur - l + skipBeginning
+ s2 := s + skipBeginning
+ off := s2 - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
- if l2 := e.matchlenLong(s, t2, src); l2 > l {
+ if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
+ s = s2
}
}
}
@@ -250,7 +258,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
if i < s-1 {
cv := load6432(src, i)
t := tableEntry{offset: i + e.cur}
- e.table[hash4x64(cv, tableBits)] = t
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
@@ -263,7 +271,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// We only have enough bits for a short entry at i+2
cv >>= 8
t = tableEntry{offset: t.offset + 1}
- e.table[hash4x64(cv, tableBits)] = t
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = t
// Skip one - otherwise we risk hitting 's'
i += 4
@@ -273,7 +281,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = t, eLong.Cur
- e.table[hash4u(uint32(cv>>8), tableBits)] = t2
+ e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
}
}
}
@@ -282,7 +290,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// compression we first update the hash table at s-1 and at s.
x := load6432(src, s-1)
o := e.cur + s - 1
- prevHashS := hash4x64(x, tableBits)
+ prevHashS := hashLen(x, tableBits, hashShortBytes)
prevHashL := hash7(x, tableBits)
e.table[prevHashS] = tableEntry{offset: o}
eLong := &e.bTable[prevHashL]
diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go
index 62888edf3..f1e9d98fa 100644
--- a/vendor/github.com/klauspost/compress/flate/level6.go
+++ b/vendor/github.com/klauspost/compress/flate/level6.go
@@ -12,6 +12,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
const (
inputMargin = 12 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashShortBytes = 4
)
if debugDeflate && e.cur < 0 {
panic(fmt.Sprint("e.cur < 0: ", e.cur))
@@ -90,7 +91,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
var l int32
var t int32
for {
- nextHashS := hash4x64(cv, tableBits)
+ nextHashS := hashLen(cv, tableBits, hashShortBytes)
nextHashL := hash7(cv, tableBits)
s = nextS
nextS = s + doEvery + (s-nextEmit)>>skipLog
@@ -107,7 +108,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
eLong.Cur, eLong.Prev = entry, eLong.Cur
// Calculate hashes of 'next'
- nextHashS = hash4x64(next, tableBits)
+ nextHashS = hashLen(next, tableBits, hashShortBytes)
nextHashL = hash7(next, tableBits)
t = lCandidate.Cur.offset - e.cur
@@ -213,24 +214,33 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// Try to locate a better match by checking the end-of-match...
if sAt := s + l; sAt < sLimit {
+ // Allow some bytes at the beginning to mismatch.
+ // Sweet spot is 2/3 bytes depending on input.
+ // 3 is only a little better when it is but sometimes a lot worse.
+ // The skipped bytes are tested in Extend backwards,
+ // and still picked up as part of the match if they do.
+ const skipBeginning = 2
eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
// Test current
- t2 := eLong.Cur.offset - e.cur - l
- off := s - t2
+ t2 := eLong.Cur.offset - e.cur - l + skipBeginning
+ s2 := s + skipBeginning
+ off := s2 - t2
if off < maxMatchOffset {
if off > 0 && t2 >= 0 {
- if l2 := e.matchlenLong(s, t2, src); l2 > l {
+ if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
+ s = s2
}
}
// Test next:
- t2 = eLong.Prev.offset - e.cur - l
- off := s - t2
+ t2 = eLong.Prev.offset - e.cur - l + skipBeginning
+ off := s2 - t2
if off > 0 && off < maxMatchOffset && t2 >= 0 {
- if l2 := e.matchlenLong(s, t2, src); l2 > l {
+ if l2 := e.matchlenLong(s2, t2, src); l2 > l {
t = t2
l = l2
+ s = s2
}
}
}
@@ -277,7 +287,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
// Index after match end.
for i := nextS + 1; i < int32(len(src))-8; i += 2 {
cv := load6432(src, i)
- e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur}
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
}
@@ -292,7 +302,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
t2 := tableEntry{offset: t.offset + 1}
eLong := &e.bTable[hash7(cv, tableBits)]
eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
- e.table[hash4x64(cv, tableBits)] = t
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = t
eLong.Cur, eLong.Prev = t, eLong.Cur
eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
}
diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go
index 93a1d1503..f3d4139ef 100644
--- a/vendor/github.com/klauspost/compress/flate/stateless.go
+++ b/vendor/github.com/klauspost/compress/flate/stateless.go
@@ -86,11 +86,19 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
dict = dict[len(dict)-maxStatelessDict:]
}
+ // For subsequent loops, keep shallow dict reference to avoid alloc+copy.
+ var inDict []byte
+
for len(in) > 0 {
todo := in
- if len(todo) > maxStatelessBlock-len(dict) {
+ if len(inDict) > 0 {
+ if len(todo) > maxStatelessBlock-maxStatelessDict {
+ todo = todo[:maxStatelessBlock-maxStatelessDict]
+ }
+ } else if len(todo) > maxStatelessBlock-len(dict) {
todo = todo[:maxStatelessBlock-len(dict)]
}
+ inOrg := in
in = in[len(todo):]
uncompressed := todo
if len(dict) > 0 {
@@ -102,7 +110,11 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
todo = combined
}
// Compress
- statelessEnc(&dst, todo, int16(len(dict)))
+ if len(inDict) == 0 {
+ statelessEnc(&dst, todo, int16(len(dict)))
+ } else {
+ statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
+ }
isEof := eof && len(in) == 0
if dst.n == 0 {
@@ -119,7 +131,8 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
}
if len(in) > 0 {
// Retain a dict if we have more
- dict = todo[len(todo)-maxStatelessDict:]
+ inDict = inOrg[len(uncompressed)-maxStatelessDict:]
+ dict = nil
dst.Reset()
}
if bw.err != nil {
diff --git a/vendor/github.com/klauspost/compress/s2/README.md b/vendor/github.com/klauspost/compress/s2/README.md
index 73c0c462d..1d80c42a5 100644
--- a/vendor/github.com/klauspost/compress/s2/README.md
+++ b/vendor/github.com/klauspost/compress/s2/README.md
@@ -325,35 +325,35 @@ The content compressed in this mode is fully compatible with the standard decode
Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all threads and a single thread (1 CPU):
-| File | S2 speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller |
-|-----------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------|
-| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 12.70x | 10556 MB/s | 7.35% | 4.15x | 3455 MB/s | 12.79% |
-| (1 CPU) | 1.14x | 948 MB/s | - | 0.42x | 349 MB/s | - |
-| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 17.13x | 14484 MB/s | 31.60% | 10.09x | 8533 MB/s | 37.71% |
-| (1 CPU) | 1.33x | 1127 MB/s | - | 0.70x | 589 MB/s | - |
-| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12000 MB/s | -5.79% | 6.59x | 5223 MB/s | 5.80% |
-| (1 CPU) | 1.11x | 877 MB/s | - | 0.47x | 370 MB/s | - |
-| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 14.62x | 12116 MB/s | 15.90% | 5.35x | 4430 MB/s | 16.08% |
-| (1 CPU) | 1.38x | 1146 MB/s | - | 0.38x | 312 MB/s | - |
-| [adresser.json](https://files.klauspost.com/compress/adresser.json.zst) | 8.83x | 17579 MB/s | 43.86% | 6.54x | 13011 MB/s | 47.23% |
-| (1 CPU) | 1.14x | 2259 MB/s | - | 0.74x | 1475 MB/s | - |
-| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 16.72x | 14019 MB/s | 24.02% | 10.11x | 8477 MB/s | 30.48% |
-| (1 CPU) | 1.24x | 1043 MB/s | - | 0.70x | 586 MB/s | - |
-| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9254 MB/s | 1.84% | 6.75x | 4686 MB/s | 6.72% |
-| (1 CPU) | 0.97x | 672 MB/s | - | 0.53x | 366 MB/s | - |
-| sharnd.out.2gb | 2.11x | 12639 MB/s | 0.01% | 1.98x | 11833 MB/s | 0.01% |
-| (1 CPU) | 0.93x | 5594 MB/s | - | 1.34x | 8030 MB/s | - |
-| [enwik9](http://mattmahoney.net/dc/textdata.html) | 19.34x | 8220 MB/s | 3.98% | 7.87x | 3345 MB/s | 15.82% |
-| (1 CPU) | 1.06x | 452 MB/s | - | 0.50x | 213 MB/s | - |
-| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 10.48x | 6124 MB/s | 5.67% | 3.76x | 2197 MB/s | 12.60% |
-| (1 CPU) | 0.97x | 568 MB/s | - | 0.46x | 271 MB/s | - |
-| [enwik10](https://encode.su/threads/3315-enwik10-benchmark-results) | 21.07x | 9020 MB/s | 6.36% | 6.91x | 2959 MB/s | 16.95% |
-| (1 CPU) | 1.07x | 460 MB/s | - | 0.51x | 220 MB/s | - |
+| File | S2 Speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller |
+|---------------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------|
+| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 16.33x | 10556 MB/s | 8.0% | 6.04x | 5252 MB/s | 14.7% |
+| (1 CPU) | 1.08x | 940 MB/s | - | 0.46x | 400 MB/s | - |
+| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 16.51x | 15224 MB/s | 31.70% | 9.47x | 8734 MB/s | 37.71% |
+| (1 CPU) | 1.26x | 1157 MB/s | - | 0.60x | 556 MB/s | - |
+| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12598 MB/s | -5.76% | 6.23x | 5675 MB/s | 3.62% |
+| (1 CPU) | 1.02x | 932 MB/s | - | 0.47x | 432 MB/s | - |
+| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 11.21x | 12116 MB/s | 15.95% | 3.24x | 3500 MB/s | 18.00% |
+| (1 CPU) | 1.05x | 1135 MB/s | - | 0.27x | 292 MB/s | - |
+| [apache.log](https://files.klauspost.com/compress/apache.log.zst) | 8.55x | 16673 MB/s | 20.54% | 5.85x | 11420 MB/s | 24.97% |
+| (1 CPU) | 1.91x | 1771 MB/s | - | 0.53x | 1041 MB/s | - |
+| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 15.76x | 14357 MB/s | 24.01% | 8.67x | 7891 MB/s | 33.68% |
+| (1 CPU) | 1.17x | 1064 MB/s | - | 0.65x | 595 MB/s | - |
+| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9835 MB/s | 2.34% | 6.85x | 4863 MB/s | 9.96% |
+| (1 CPU) | 0.97x | 689 MB/s | - | 0.55x | 387 MB/s | - |
+| sharnd.out.2gb | 9.11x | 13213 MB/s | 0.01% | 1.49x | 9184 MB/s | 0.01% |
+| (1 CPU) | 0.88x | 5418 MB/s | - | 0.77x | 5417 MB/s | - |
+| [sofia-air-quality-dataset csv](https://files.klauspost.com/compress/sofia-air-quality-dataset.tar.zst) | 22.00x | 11477 MB/s | 18.73% | 11.15x | 5817 MB/s | 27.88% |
+| (1 CPU) | 1.23x | 642 MB/s | - | 0.71x | 642 MB/s | - |
+| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 11.23x | 6520 MB/s | 5.9% | 5.35x | 3109 MB/s | 15.88% |
+| (1 CPU) | 1.05x | 607 MB/s | - | 0.52x | 304 MB/s | - |
+| [enwik9](https://files.klauspost.com/compress/enwik9.zst) | 19.28x | 8440 MB/s | 4.04% | 9.31x | 4076 MB/s | 18.04% |
+| (1 CPU) | 1.12x | 488 MB/s | - | 0.57x | 250 MB/s | - |
### Legend
-* `S2 speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core.
-* `S2 throughput`: Throughput of S2 in MB/s.
+* `S2 Speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core.
+* `S2 Throughput`: Throughput of S2 in MB/s.
* `S2 % smaller`: How many percent of the Snappy output size is S2 better.
* `S2 "better"`: Speed when enabling "better" compression mode in S2 compared to Snappy.
* `"better" throughput`: Speed when enabling "better" compression mode in S2 compared to Snappy.
@@ -361,7 +361,7 @@ Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all th
There is a good speedup across the board when using a single thread and a significant speedup when using multiple threads.
-Machine generated data gets by far the biggest compression boost, with size being being reduced by up to 45% of Snappy size.
+Machine generated data gets by far the biggest compression boost, with size being reduced by up to 35% of Snappy size.
The "better" compression mode sees a good improvement in all cases, but usually at a performance cost.
@@ -404,15 +404,15 @@ The "better" compression mode will actively look for shorter matches, which is w
Without assembly decompression is also very fast; single goroutine decompression speed. No assembly:
| File | S2 Throughput | S2 throughput |
-|--------------------------------|--------------|---------------|
-| consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s |
-| 10gb.tar.s2 | 1.30x | 867.07 MB/s |
-| rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s |
-| github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s |
-| github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s |
-| enwik9.s2 | 1.67x | 681.53 MB/s |
-| adresser.json.s2 | 3.41x | 4230.53 MB/s |
-| silesia.tar.s2 | 1.52x | 811.58 |
+|--------------------------------|---------------|---------------|
+| consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s |
+| 10gb.tar.s2 | 1.30x | 867.07 MB/s |
+| rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s |
+| github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s |
+| github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s |
+| enwik9.s2 | 1.67x | 681.53 MB/s |
+| adresser.json.s2 | 3.41x | 4230.53 MB/s |
+| silesia.tar.s2 | 1.52x | 811.58 |
Even though S2 typically compresses better than Snappy, decompression speed is always better.
@@ -450,14 +450,14 @@ The most reliable is a wide dataset.
For this we use [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z),
53927 files, total input size: 4,014,735,833 bytes. Single goroutine used.
-| * | Input | Output | Reduction | MB/s |
-|-------------------|------------|------------|-----------|--------|
-| S2 | 4014735833 | 1059723369 | 73.60% | **934.34** |
-| S2 Better | 4014735833 | 969670507 | 75.85% | 532.70 |
-| S2 Best | 4014735833 | 906625668 | **77.85%** | 46.84 |
-| Snappy | 4014735833 | 1128706759 | 71.89% | 762.59 |
-| S2, Snappy Output | 4014735833 | 1093821420 | 72.75% | 908.60 |
-| LZ4 | 4014735833 | 1079259294 | 73.12% | 526.94 |
+| * | Input | Output | Reduction | MB/s |
+|-------------------|------------|------------|------------|------------|
+| S2 | 4014735833 | 1059723369 | 73.60% | **936.73** |
+| S2 Better | 4014735833 | 961580539 | 76.05% | 451.10 |
+| S2 Best | 4014735833 | 899182886 | **77.60%** | 46.84 |
+| Snappy | 4014735833 | 1128706759 | 71.89% | 790.15 |
+| S2, Snappy Output | 4014735833 | 1093823291 | 72.75% | 936.60 |
+| LZ4 | 4014735833 | 1063768713 | 73.50% | 452.02 |
S2 delivers both the best single threaded throughput with regular mode and the best compression rate with "best".
"Better" mode provides the same compression speed as LZ4 with better compression ratio.
@@ -489,42 +489,23 @@ AMD64 assembly is use for both S2 and Snappy.
| Absolute Perf | Snappy size | S2 Size | Snappy Speed | S2 Speed | Snappy dec | S2 dec |
|-----------------------|-------------|---------|--------------|-------------|-------------|-------------|
-| html | 22843 | 21111 | 16246 MB/s | 17438 MB/s | 40972 MB/s | 49263 MB/s |
-| urls.10K | 335492 | 287326 | 7943 MB/s | 9693 MB/s | 22523 MB/s | 26484 MB/s |
-| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 273889 MB/s | 718321 MB/s | 827552 MB/s |
-| fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 17773 MB/s | 33691 MB/s | 52421 MB/s |
-| paper-100k.pdf | 85304 | 84459 | 167546 MB/s | 101263 MB/s | 326905 MB/s | 291944 MB/s |
-| html_x_4 | 92234 | 21113 | 15194 MB/s | 50670 MB/s | 30843 MB/s | 32217 MB/s |
-| alice29.txt | 88034 | 85975 | 5936 MB/s | 6139 MB/s | 12882 MB/s | 20044 MB/s |
-| asyoulik.txt | 77503 | 79650 | 5517 MB/s | 6366 MB/s | 12735 MB/s | 22806 MB/s |
-| lcet10.txt | 234661 | 220670 | 6235 MB/s | 6067 MB/s | 14519 MB/s | 18697 MB/s |
-| plrabn12.txt | 319267 | 317985 | 5159 MB/s | 5726 MB/s | 11923 MB/s | 19901 MB/s |
-| geo.protodata | 23335 | 18690 | 21220 MB/s | 26529 MB/s | 56271 MB/s | 62540 MB/s |
-| kppkn.gtb | 69526 | 65312 | 9732 MB/s | 8559 MB/s | 18491 MB/s | 18969 MB/s |
-| alice29.txt (128B) | 80 | 82 | 6691 MB/s | 15489 MB/s | 31883 MB/s | 38874 MB/s |
-| alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13000 MB/s | 48056 MB/s | 52341 MB/s |
-| alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12806 MB/s | 32378 MB/s | 46322 MB/s |
-| alice29.txt (20000B) | 12686 | 13574 | 7733 MB/s | 11210 MB/s | 30566 MB/s | 58969 MB/s |
-
-
-| Relative Perf | Snappy size | S2 size improved | S2 Speed | S2 Dec Speed |
-|-----------------------|-------------|------------------|----------|--------------|
-| html | 22.31% | 7.58% | 1.07x | 1.20x |
-| urls.10K | 47.78% | 14.36% | 1.22x | 1.18x |
-| fireworks.jpeg | 99.95% | -0.05% | 0.78x | 1.15x |
-| fireworks.jpeg (200B) | 73.00% | -6.16% | 2.00x | 1.56x |
-| paper-100k.pdf | 83.30% | 0.99% | 0.60x | 0.89x |
-| html_x_4 | 22.52% | 77.11% | 3.33x | 1.04x |
-| alice29.txt | 57.88% | 2.34% | 1.03x | 1.56x |
-| asyoulik.txt | 61.91% | -2.77% | 1.15x | 1.79x |
-| lcet10.txt | 54.99% | 5.96% | 0.97x | 1.29x |
-| plrabn12.txt | 66.26% | 0.40% | 1.11x | 1.67x |
-| geo.protodata | 19.68% | 19.91% | 1.25x | 1.11x |
-| kppkn.gtb | 37.72% | 6.06% | 0.88x | 1.03x |
-| alice29.txt (128B) | 62.50% | -2.50% | 2.31x | 1.22x |
-| alice29.txt (1000B) | 77.40% | 0.00% | 1.07x | 1.09x |
-| alice29.txt (10000B) | 66.48% | -4.29% | 1.27x | 1.43x |
-| alice29.txt (20000B) | 63.43% | -7.00% | 1.45x | 1.93x |
+| html | 22843 | 20868 | 16246 MB/s | 18617 MB/s | 40972 MB/s | 49263 MB/s |
+| urls.10K | 335492 | 286541 | 7943 MB/s | 10201 MB/s | 22523 MB/s | 26484 MB/s |
+| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 303228 MB/s | 718321 MB/s | 827552 MB/s |
+| fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 20180 MB/s | 33691 MB/s | 52421 MB/s |
+| paper-100k.pdf | 85304 | 84202 | 167546 MB/s | 112988 MB/s | 326905 MB/s | 291944 MB/s |
+| html_x_4 | 92234 | 20870 | 15194 MB/s | 54457 MB/s | 30843 MB/s | 32217 MB/s |
+| alice29.txt | 88034 | 85934 | 5936 MB/s | 6540 MB/s | 12882 MB/s | 20044 MB/s |
+| asyoulik.txt | 77503 | 79575 | 5517 MB/s | 6657 MB/s | 12735 MB/s | 22806 MB/s |
+| lcet10.txt | 234661 | 220383 | 6235 MB/s | 6303 MB/s | 14519 MB/s | 18697 MB/s |
+| plrabn12.txt | 319267 | 318196 | 5159 MB/s | 6074 MB/s | 11923 MB/s | 19901 MB/s |
+| geo.protodata | 23335 | 18606 | 21220 MB/s | 25432 MB/s | 56271 MB/s | 62540 MB/s |
+| kppkn.gtb | 69526 | 65019 | 9732 MB/s | 8905 MB/s | 18491 MB/s | 18969 MB/s |
+| alice29.txt (128B) | 80 | 82 | 6691 MB/s | 17179 MB/s | 31883 MB/s | 38874 MB/s |
+| alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13273 MB/s | 48056 MB/s | 52341 MB/s |
+| alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12824 MB/s | 32378 MB/s | 46322 MB/s |
+| alice29.txt (20000B) | 12686 | 13516 | 7733 MB/s | 12160 MB/s | 30566 MB/s | 58969 MB/s |
+
Speed is generally at or above Snappy. Small blocks gets a significant speedup, although at the expense of size.
@@ -543,42 +524,23 @@ So individual benchmarks should only be seen as a guideline and the overall pict
| Absolute Perf | Snappy size | Better Size | Snappy Speed | Better Speed | Snappy dec | Better dec |
|-----------------------|-------------|-------------|--------------|--------------|-------------|-------------|
-| html | 22843 | 19833 | 16246 MB/s | 7731 MB/s | 40972 MB/s | 40292 MB/s |
-| urls.10K | 335492 | 253529 | 7943 MB/s | 3980 MB/s | 22523 MB/s | 20981 MB/s |
-| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 9760 MB/s | 718321 MB/s | 823698 MB/s |
-| fireworks.jpeg (200B) | 146 | 142 | 8869 MB/s | 594 MB/s | 33691 MB/s | 30101 MB/s |
-| paper-100k.pdf | 85304 | 82915 | 167546 MB/s | 7470 MB/s | 326905 MB/s | 198869 MB/s |
-| html_x_4 | 92234 | 19841 | 15194 MB/s | 23403 MB/s | 30843 MB/s | 30937 MB/s |
-| alice29.txt | 88034 | 73218 | 5936 MB/s | 2945 MB/s | 12882 MB/s | 16611 MB/s |
-| asyoulik.txt | 77503 | 66844 | 5517 MB/s | 2739 MB/s | 12735 MB/s | 14975 MB/s |
-| lcet10.txt | 234661 | 190589 | 6235 MB/s | 3099 MB/s | 14519 MB/s | 16634 MB/s |
-| plrabn12.txt | 319267 | 270828 | 5159 MB/s | 2600 MB/s | 11923 MB/s | 13382 MB/s |
-| geo.protodata | 23335 | 18278 | 21220 MB/s | 11208 MB/s | 56271 MB/s | 57961 MB/s |
-| kppkn.gtb | 69526 | 61851 | 9732 MB/s | 4556 MB/s | 18491 MB/s | 16524 MB/s |
-| alice29.txt (128B) | 80 | 81 | 6691 MB/s | 529 MB/s | 31883 MB/s | 34225 MB/s |
-| alice29.txt (1000B) | 774 | 748 | 12204 MB/s | 1943 MB/s | 48056 MB/s | 42068 MB/s |
-| alice29.txt (10000B) | 6648 | 6234 | 10044 MB/s | 2949 MB/s | 32378 MB/s | 28813 MB/s |
-| alice29.txt (20000B) | 12686 | 11584 | 7733 MB/s | 2822 MB/s | 30566 MB/s | 27315 MB/s |
-
-
-| Relative Perf | Snappy size | Better size | Better Speed | Better dec |
-|-----------------------|-------------|-------------|--------------|------------|
-| html | 22.31% | 13.18% | 0.48x | 0.98x |
-| urls.10K | 47.78% | 24.43% | 0.50x | 0.93x |
-| fireworks.jpeg | 99.95% | -0.05% | 0.03x | 1.15x |
-| fireworks.jpeg (200B) | 73.00% | 2.74% | 0.07x | 0.89x |
-| paper-100k.pdf | 83.30% | 2.80% | 0.07x | 0.61x |
-| html_x_4 | 22.52% | 78.49% | 0.04x | 1.00x |
-| alice29.txt | 57.88% | 16.83% | 1.54x | 1.29x |
-| asyoulik.txt | 61.91% | 13.75% | 0.50x | 1.18x |
-| lcet10.txt | 54.99% | 18.78% | 0.50x | 1.15x |
-| plrabn12.txt | 66.26% | 15.17% | 0.50x | 1.12x |
-| geo.protodata | 19.68% | 21.67% | 0.50x | 1.03x |
-| kppkn.gtb | 37.72% | 11.04% | 0.53x | 0.89x |
-| alice29.txt (128B) | 62.50% | -1.25% | 0.47x | 1.07x |
-| alice29.txt (1000B) | 77.40% | 3.36% | 0.08x | 0.88x |
-| alice29.txt (10000B) | 66.48% | 6.23% | 0.16x | 0.89x |
-| alice29.txt (20000B) | 63.43% | 8.69% | 0.29x | 0.89x |
+| html | 22843 | 18972 | 16246 MB/s | 8621 MB/s | 40972 MB/s | 40292 MB/s |
+| urls.10K | 335492 | 248079 | 7943 MB/s | 5104 MB/s | 22523 MB/s | 20981 MB/s |
+| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 84429 MB/s | 718321 MB/s | 823698 MB/s |
+| fireworks.jpeg (200B) | 146 | 149 | 8869 MB/s | 7125 MB/s | 33691 MB/s | 30101 MB/s |
+| paper-100k.pdf | 85304 | 82887 | 167546 MB/s | 11087 MB/s | 326905 MB/s | 198869 MB/s |
+| html_x_4 | 92234 | 18982 | 15194 MB/s | 29316 MB/s | 30843 MB/s | 30937 MB/s |
+| alice29.txt | 88034 | 71611 | 5936 MB/s | 3709 MB/s | 12882 MB/s | 16611 MB/s |
+| asyoulik.txt | 77503 | 65941 | 5517 MB/s | 3380 MB/s | 12735 MB/s | 14975 MB/s |
+| lcet10.txt | 234661 | 184939 | 6235 MB/s | 3537 MB/s | 14519 MB/s | 16634 MB/s |
+| plrabn12.txt | 319267 | 264990 | 5159 MB/s | 2960 MB/s | 11923 MB/s | 13382 MB/s |
+| geo.protodata | 23335 | 17689 | 21220 MB/s | 10859 MB/s | 56271 MB/s | 57961 MB/s |
+| kppkn.gtb | 69526 | 55398 | 9732 MB/s | 5206 MB/s | 18491 MB/s | 16524 MB/s |
+| alice29.txt (128B) | 80 | 78 | 6691 MB/s | 7422 MB/s | 31883 MB/s | 34225 MB/s |
+| alice29.txt (1000B) | 774 | 746 | 12204 MB/s | 5734 MB/s | 48056 MB/s | 42068 MB/s |
+| alice29.txt (10000B) | 6648 | 6218 | 10044 MB/s | 6055 MB/s | 32378 MB/s | 28813 MB/s |
+| alice29.txt (20000B) | 12686 | 11492 | 7733 MB/s | 3143 MB/s | 30566 MB/s | 27315 MB/s |
+
Except for the mostly incompressible JPEG image compression is better and usually in the
double digits in terms of percentage reduction over Snappy.
@@ -605,29 +567,29 @@ Some examples compared on 16 core CPU, amd64 assembly used:
```
* enwik10
-Default... 10000000000 -> 4761467548 [47.61%]; 1.098s, 8685.6MB/s
-Better... 10000000000 -> 4219438251 [42.19%]; 1.925s, 4954.2MB/s
-Best... 10000000000 -> 3627364337 [36.27%]; 43.051s, 221.5MB/s
+Default... 10000000000 -> 4759950115 [47.60%]; 1.03s, 9263.0MB/s
+Better... 10000000000 -> 4084706676 [40.85%]; 2.16s, 4415.4MB/s
+Best... 10000000000 -> 3615520079 [36.16%]; 42.259s, 225.7MB/s
* github-june-2days-2019.json
-Default... 6273951764 -> 1043196283 [16.63%]; 431ms, 13882.3MB/s
-Better... 6273951764 -> 949146808 [15.13%]; 547ms, 10938.4MB/s
-Best... 6273951764 -> 832855506 [13.27%]; 9.455s, 632.8MB/s
+Default... 6273951764 -> 1041700255 [16.60%]; 431ms, 13882.3MB/s
+Better... 6273951764 -> 945841238 [15.08%]; 547ms, 10938.4MB/s
+Best... 6273951764 -> 826392576 [13.17%]; 9.455s, 632.8MB/s
* nyc-taxi-data-10M.csv
-Default... 3325605752 -> 1095998837 [32.96%]; 324ms, 9788.7MB/s
-Better... 3325605752 -> 954776589 [28.71%]; 491ms, 6459.4MB/s
-Best... 3325605752 -> 779098746 [23.43%]; 8.29s, 382.6MB/s
+Default... 3325605752 -> 1093516949 [32.88%]; 324ms, 9788.7MB/s
+Better... 3325605752 -> 885394158 [26.62%]; 491ms, 6459.4MB/s
+Best... 3325605752 -> 773681257 [23.26%]; 8.29s, 412.0MB/s
* 10gb.tar
-Default... 10065157632 -> 5916578242 [58.78%]; 1.028s, 9337.4MB/s
-Better... 10065157632 -> 5649207485 [56.13%]; 1.597s, 6010.6MB/s
-Best... 10065157632 -> 5208719802 [51.75%]; 32.78s, 292.8MB/
+Default... 10065157632 -> 5915541066 [58.77%]; 1.028s, 9337.4MB/s
+Better... 10065157632 -> 5453844650 [54.19%]; 1.597s, 4862.7MB/s
+Best... 10065157632 -> 5192495021 [51.59%]; 32.78s, 308.2MB/
* consensus.db.10gb
-Default... 10737418240 -> 4562648848 [42.49%]; 882ms, 11610.0MB/s
-Better... 10737418240 -> 4542428129 [42.30%]; 1.533s, 6679.7MB/s
-Best... 10737418240 -> 4244773384 [39.53%]; 42.96s, 238.4MB/s
+Default... 10737418240 -> 4549762344 [42.37%]; 882ms, 12118.4MB/s
+Better... 10737418240 -> 4438535064 [41.34%]; 1.533s, 3500.9MB/s
+Best... 10737418240 -> 4210602774 [39.21%]; 42.96s, 254.4MB/s
```
Decompression speed should be around the same as using the 'better' compression mode.
@@ -648,10 +610,10 @@ If you would like more control, you can use the s2 package as described below:
Snappy compatible blocks can be generated with the S2 encoder.
Compression and speed is typically a bit better `MaxEncodedLen` is also smaller for smaller memory usage. Replace
-| Snappy | S2 replacement |
-|----------------------------|-------------------------|
-| snappy.Encode(...) | s2.EncodeSnappy(...) |
-| snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) |
+| Snappy | S2 replacement |
+|---------------------------|-----------------------|
+| snappy.Encode(...) | s2.EncodeSnappy(...) |
+| snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) |
`s2.EncodeSnappy` can be replaced with `s2.EncodeSnappyBetter` or `s2.EncodeSnappyBest` to get more efficiently compressed snappy compatible output.
@@ -660,12 +622,12 @@ Compression and speed is typically a bit better `MaxEncodedLen` is also smaller
Comparison of [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z),
53927 files, total input size: 4,014,735,833 bytes. amd64, single goroutine used:
-| Encoder | Size | MB/s | Reduction |
-|-----------------------|------------|------------|------------
-| snappy.Encode | 1128706759 | 725.59 | 71.89% |
-| s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% |
-| s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% |
-| s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%**|
+| Encoder | Size | MB/s | Reduction |
+|-----------------------|------------|------------|------------|
+| snappy.Encode | 1128706759 | 725.59 | 71.89% |
+| s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% |
+| s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% |
+| s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%** |
## Streams
@@ -835,6 +797,13 @@ This is done using the regular "Skip" function:
This will ensure that we are at exactly the offset we want, and reading from `dec` will start at the requested offset.
+# Compact storage
+
+For compact storage [RemoveIndexHeaders](https://pkg.go.dev/github.com/klauspost/compress/s2#RemoveIndexHeaders) can be used to remove any redundant info from
+a serialized index. If you remove the header it must be restored before [Loading](https://pkg.go.dev/github.com/klauspost/compress/s2#Index.Load).
+
+This is expected to save 20 bytes. These can be restored using [RestoreIndexHeaders](https://pkg.go.dev/github.com/klauspost/compress/s2#RestoreIndexHeaders). This removes a layer of security, but is the most compact representation. Returns nil if headers contains errors.
+
## Index Format:
Each block is structured as a snappy skippable block, with the chunk ID 0x99.
@@ -844,20 +813,20 @@ The block can be read from the front, but contains information so it can be read
Numbers are stored as fixed size little endian values or [zigzag encoded](https://developers.google.com/protocol-buffers/docs/encoding#signed_integers) [base 128 varints](https://developers.google.com/protocol-buffers/docs/encoding),
with un-encoded value length of 64 bits, unless other limits are specified.
-| Content | Format |
-|---------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|
-| ID, `[1]byte` | Always 0x99. |
-| Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. |
-| Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". |
-| UncompressedSize, Varint | Total Uncompressed size. |
-| CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. |
-| EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. |
-| Entries, Varint | Number of Entries in index, must be < 65536 and >=0. |
-| HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. |
-| UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. |
-| CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. |
-| Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. |
-| Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. |
+| Content | Format |
+|--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------|
+| ID, `[1]byte` | Always 0x99. |
+| Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. |
+| Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". |
+| UncompressedSize, Varint | Total Uncompressed size. |
+| CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. |
+| EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. |
+| Entries, Varint | Number of Entries in index, must be < 65536 and >=0. |
+| HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. |
+| UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. |
+| CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. |
+| Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. |
+| Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. |
For regular streams the uncompressed offsets are fully predictable,
so `HasUncompressedOffsets` allows to specify that compressed blocks all have
@@ -929,6 +898,7 @@ To decode from any given uncompressed offset `(wantOffset)`:
See [using indexes](https://github.com/klauspost/compress/tree/master/s2#using-indexes) for functions that perform the operations with a simpler interface.
+
# Format Extensions
* Frame [Stream identifier](https://github.com/google/snappy/blob/master/framing_format.txt#L68) changed from `sNaPpY` to `S2sTwO`.
@@ -951,10 +921,11 @@ The length is specified by reading the 3-bit length specified in the tag and dec
| 7 | 65540 + read 3 bytes |
This allows any repeat offset + length to be represented by 2 to 5 bytes.
+It also allows to emit matches longer than 64 bytes with one copy + one repeat instead of several 64 byte copies.
Lengths are stored as little endian values.
-The first copy of a block cannot be a repeat offset and the offset is not carried across blocks in streams.
+The first copy of a block cannot be a repeat offset and the offset is reset on every block in streams.
Default streaming block size is 1MB.
diff --git a/vendor/github.com/klauspost/compress/s2/decode.go b/vendor/github.com/klauspost/compress/s2/decode.go
index 27c0f3c2c..00c5cc72c 100644
--- a/vendor/github.com/klauspost/compress/s2/decode.go
+++ b/vendor/github.com/klauspost/compress/s2/decode.go
@@ -952,7 +952,11 @@ func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
// Seek allows seeking in compressed data.
func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
if r.err != nil {
- return 0, r.err
+ if !errors.Is(r.err, io.EOF) {
+ return 0, r.err
+ }
+ // Reset on EOF
+ r.err = nil
}
if offset == 0 && whence == io.SeekCurrent {
return r.blockStart + int64(r.i), nil
diff --git a/vendor/github.com/klauspost/compress/s2/decode_other.go b/vendor/github.com/klauspost/compress/s2/decode_other.go
index 1074ebd21..11300c3a8 100644
--- a/vendor/github.com/klauspost/compress/s2/decode_other.go
+++ b/vendor/github.com/klauspost/compress/s2/decode_other.go
@@ -28,6 +28,9 @@ func s2Decode(dst, src []byte) int {
// As long as we can read at least 5 bytes...
for s < len(src)-5 {
+ // Removing bounds checks is SLOWER, when if doing
+ // in := src[s:s+5]
+ // Checked on Go 1.18
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
@@ -38,14 +41,19 @@ func s2Decode(dst, src []byte) int {
s += 2
x = uint32(src[s-1])
case x == 61:
+ in := src[s : s+3]
+ x = uint32(in[1]) | uint32(in[2])<<8
s += 3
- x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
+ in := src[s : s+4]
+ // Load as 32 bit and shift down.
+ x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
+ x >>= 8
s += 4
- x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
+ in := src[s : s+5]
+ x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
s += 5
- x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
@@ -62,8 +70,8 @@ func s2Decode(dst, src []byte) int {
case tagCopy1:
s += 2
- length = int(src[s-2]) >> 2 & 0x7
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+ length = int(src[s-2]) >> 2 & 0x7
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
@@ -71,14 +79,16 @@ func s2Decode(dst, src []byte) int {
// keep last offset
switch length {
case 5:
+ length = int(src[s]) + 4
s += 1
- length = int(uint32(src[s-1])) + 4
case 6:
+ in := src[s : s+2]
+ length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
s += 2
- length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
case 7:
+ in := src[s : s+3]
+ length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
s += 3
- length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
default: // 0-> 4
}
} else {
@@ -86,14 +96,16 @@ func s2Decode(dst, src []byte) int {
}
length += 4
case tagCopy2:
+ in := src[s : s+3]
+ offset = int(uint32(in[1]) | uint32(in[2])<<8)
+ length = 1 + int(in[0])>>2
s += 3
- length = 1 + int(src[s-3])>>2
- offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
+ in := src[s : s+5]
+ offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
+ length = 1 + int(in[0])>>2
s += 5
- length = 1 + int(src[s-5])>>2
- offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || d < offset || length > len(dst)-d {
diff --git a/vendor/github.com/klauspost/compress/s2/encode_all.go b/vendor/github.com/klauspost/compress/s2/encode_all.go
index 8b16c38a6..54c71d3b5 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_all.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_all.go
@@ -58,8 +58,9 @@ func encodeGo(dst, src []byte) []byte {
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockGo(dst, src []byte) (d int) {
// Initialize the hash table.
const (
diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go
index e612225f4..6b93daa5a 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go
@@ -8,8 +8,9 @@ package s2
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlock(dst, src []byte) (d int) {
const (
// Use 12 bit table when less than...
@@ -43,8 +44,9 @@ func encodeBlock(dst, src []byte) (d int) {
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetter(dst, src []byte) (d int) {
const (
// Use 12 bit table when less than...
@@ -78,8 +80,9 @@ func encodeBlockBetter(dst, src []byte) (d int) {
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockSnappy(dst, src []byte) (d int) {
const (
// Use 12 bit table when less than...
@@ -112,8 +115,9 @@ func encodeBlockSnappy(dst, src []byte) (d int) {
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterSnappy(dst, src []byte) (d int) {
const (
// Use 12 bit table when less than...
diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go
index 4bc80bc6a..1b7ea394f 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_best.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_best.go
@@ -15,8 +15,9 @@ import (
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBest(dst, src []byte) (d int) {
// Initialize the hash tables.
const (
@@ -176,14 +177,21 @@ func encodeBlockBest(dst, src []byte) (d int) {
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
}
// Search for a match at best match end, see if that is better.
- if sAt := best.s + best.length; sAt < sLimit {
- sBack := best.s
- backL := best.length
+ // Allow some bytes at the beginning to mismatch.
+ // Sweet spot is around 1-2 bytes, but depends on input.
+ // The skipped bytes are tested in Extend backwards,
+ // and still picked up as part of the match if they do.
+ const skipBeginning = 2
+ const skipEnd = 1
+ if sAt := best.s + best.length - skipEnd; sAt < sLimit {
+
+ sBack := best.s + skipBeginning - skipEnd
+ backL := best.length - skipBeginning
// Load initial values
cv = load64(src, sBack)
- // Search for mismatch
+
+ // Grab candidates...
next := lTable[hash8(load64(src, sAt), lTableBits)]
- //next := sTable[hash4(load64(src, sAt), sTableBits)]
if checkAt := getCur(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
@@ -191,6 +199,16 @@ func encodeBlockBest(dst, src []byte) (d int) {
if checkAt := getPrev(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
+ // Disabled: Extremely small gain
+ if false {
+ next = sTable[hash4(load64(src, sAt), sTableBits)]
+ if checkAt := getCur(next) - backL; checkAt > 0 {
+ best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
+ }
+ if checkAt := getPrev(next) - backL; checkAt > 0 {
+ best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
+ }
+ }
}
}
}
@@ -288,8 +306,9 @@ emitRemainder:
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBestSnappy(dst, src []byte) (d int) {
// Initialize the hash tables.
const (
@@ -546,6 +565,7 @@ emitRemainder:
// emitCopySize returns the size to encode the offset+length
//
// It assumes that:
+//
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
func emitCopySize(offset, length int) int {
@@ -584,6 +604,7 @@ func emitCopySize(offset, length int) int {
// emitCopyNoRepeatSize returns the size to encode the offset+length
//
// It assumes that:
+//
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
func emitCopyNoRepeatSize(offset, length int) int {
diff --git a/vendor/github.com/klauspost/compress/s2/encode_better.go b/vendor/github.com/klauspost/compress/s2/encode_better.go
index 943215b8a..3b66ba42b 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_better.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_better.go
@@ -42,8 +42,9 @@ func hash8(u uint64, h uint8) uint32 {
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterGo(dst, src []byte) (d int) {
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
@@ -56,7 +57,7 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
// Initialize the hash tables.
const (
// Long hash matches.
- lTableBits = 16
+ lTableBits = 17
maxLTableSize = 1 << lTableBits
// Short hash matches.
@@ -97,9 +98,26 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
lTable[hashL] = uint32(s)
sTable[hashS] = uint32(s)
+ valLong := load64(src, candidateL)
+ valShort := load64(src, candidateS)
+
+ // If long matches at least 8 bytes, use that.
+ if cv == valLong {
+ break
+ }
+ if cv == valShort {
+ candidateL = candidateS
+ break
+ }
+
// Check repeat at offset checkRep.
const checkRep = 1
- if false && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
+ // Minimum length of a repeat. Tested with various values.
+ // While 4-5 offers improvements in some, 6 reduces
+ // regressions significantly.
+ const wantRepeatBytes = 6
+ const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep)
+ if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask {
base := s + checkRep
// Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
@@ -109,8 +127,8 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
d += emitLiteral(dst[d:], src[nextEmit:base])
// Extend forward
- candidate := s - repeat + 4 + checkRep
- s += 4 + checkRep
+ candidate := s - repeat + wantRepeatBytes + checkRep
+ s += wantRepeatBytes + checkRep
for s < len(src) {
if len(src)-s < 8 {
if src[s] == src[candidate] {
@@ -127,28 +145,40 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
s += 8
candidate += 8
}
- if nextEmit > 0 {
- // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
- d += emitRepeat(dst[d:], repeat, s-base)
- } else {
- // First match, cannot be repeat.
- d += emitCopy(dst[d:], repeat, s-base)
- }
+ // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
+ d += emitRepeat(dst[d:], repeat, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
+ // Index in-between
+ index0 := base + 1
+ index1 := s - 2
+
+ cv = load64(src, s)
+ for index0 < index1 {
+ cv0 := load64(src, index0)
+ cv1 := load64(src, index1)
+ lTable[hash7(cv0, lTableBits)] = uint32(index0)
+ sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
+
+ lTable[hash7(cv1, lTableBits)] = uint32(index1)
+ sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
+ index0 += 2
+ index1 -= 2
+ }
cv = load64(src, s)
continue
}
- if uint32(cv) == load32(src, candidateL) {
+ // Long likely matches 7, so take that.
+ if uint32(cv) == uint32(valLong) {
break
}
// Check our short candidate
- if uint32(cv) == load32(src, candidateS) {
+ if uint32(cv) == uint32(valShort) {
// Try a long candidate at s+1
hashL = hash7(cv>>8, lTableBits)
candidateL = int(lTable[hashL])
@@ -227,21 +257,29 @@ func encodeBlockBetterGo(dst, src []byte) (d int) {
// Do we have space for more, if not bail.
return 0
}
- // Index match start+1 (long) and start+2 (short)
+
+ // Index short & long
index0 := base + 1
- // Index match end-2 (long) and end-1 (short)
index1 := s - 2
cv0 := load64(src, index0)
cv1 := load64(src, index1)
- cv = load64(src, s)
lTable[hash7(cv0, lTableBits)] = uint32(index0)
- lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
- lTable[hash7(cv1, lTableBits)] = uint32(index1)
- lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
- sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
+
+ lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
+ index0 += 1
+ index1 -= 1
+ cv = load64(src, s)
+
+ // index every second long in between.
+ for index0 < index1 {
+ lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
+ lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
+ index0 += 2
+ index1 -= 2
+ }
}
emitRemainder:
@@ -260,8 +298,9 @@ emitRemainder:
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
@@ -402,21 +441,29 @@ func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
// Do we have space for more, if not bail.
return 0
}
- // Index match start+1 (long) and start+2 (short)
+
+ // Index short & long
index0 := base + 1
- // Index match end-2 (long) and end-1 (short)
index1 := s - 2
cv0 := load64(src, index0)
cv1 := load64(src, index1)
- cv = load64(src, s)
lTable[hash7(cv0, lTableBits)] = uint32(index0)
- lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
- lTable[hash7(cv1, lTableBits)] = uint32(index1)
- lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
- sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
+
+ lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
+ index0 += 1
+ index1 -= 1
+ cv = load64(src, s)
+
+ // index every second long in between.
+ for index0 < index1 {
+ lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
+ lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
+ index0 += 2
+ index1 -= 2
+ }
}
emitRemainder:
diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go
index 94784b82a..db08fc355 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_go.go
@@ -12,6 +12,7 @@ import (
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src))
func encodeBlock(dst, src []byte) (d int) {
if len(src) < minNonLiteralBlockSize {
@@ -25,6 +26,7 @@ func encodeBlock(dst, src []byte) (d int) {
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src))
func encodeBlockBetter(dst, src []byte) (d int) {
return encodeBlockBetterGo(dst, src)
@@ -35,6 +37,7 @@ func encodeBlockBetter(dst, src []byte) (d int) {
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src))
func encodeBlockBetterSnappy(dst, src []byte) (d int) {
return encodeBlockBetterSnappyGo(dst, src)
@@ -45,6 +48,7 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) {
// been written.
//
// It also assumes that:
+//
// len(dst) >= MaxEncodedLen(len(src))
func encodeBlockSnappy(dst, src []byte) (d int) {
if len(src) < minNonLiteralBlockSize {
@@ -56,6 +60,7 @@ func encodeBlockSnappy(dst, src []byte) (d int) {
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
+//
// dst is long enough to hold the encoded bytes
// 0 <= len(lit) && len(lit) <= math.MaxUint32
func emitLiteral(dst, lit []byte) int {
@@ -146,6 +151,7 @@ func emitRepeat(dst []byte, offset, length int) int {
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
+//
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
@@ -214,6 +220,7 @@ func emitCopy(dst []byte, offset, length int) int {
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
+//
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
@@ -273,8 +280,8 @@ func emitCopyNoRepeat(dst []byte, offset, length int) int {
// matchLen returns how many bytes match in a and b
//
// It assumes that:
-// len(a) <= len(b)
//
+// len(a) <= len(b)
func matchLen(a []byte, b []byte) int {
b = b[:len(a)]
var checked int
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
index 88f27c099..7e00bac3e 100644
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
@@ -1,7 +1,6 @@
// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
-// +build !appengine,!noasm,gc,!noasm
package s2
@@ -150,8 +149,9 @@ func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
-// dst is long enough to hold the encoded bytes with margin of 0 bytes
-// 0 <= len(lit) && len(lit) <= math.MaxUint32
+//
+// dst is long enough to hold the encoded bytes with margin of 0 bytes
+// 0 <= len(lit) && len(lit) <= math.MaxUint32
//
//go:noescape
func emitLiteral(dst []byte, lit []byte) int
@@ -165,9 +165,10 @@ func emitRepeat(dst []byte, offset int, length int) int
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
-// dst is long enough to hold the encoded bytes
-// 1 <= offset && offset <= math.MaxUint32
-// 4 <= length && length <= 1 << 24
+//
+// dst is long enough to hold the encoded bytes
+// 1 <= offset && offset <= math.MaxUint32
+// 4 <= length && length <= 1 << 24
//
//go:noescape
func emitCopy(dst []byte, offset int, length int) int
@@ -175,9 +176,10 @@ func emitCopy(dst []byte, offset int, length int) int
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
-// dst is long enough to hold the encoded bytes
-// 1 <= offset && offset <= math.MaxUint32
-// 4 <= length && length <= 1 << 24
+//
+// dst is long enough to hold the encoded bytes
+// 1 <= offset && offset <= math.MaxUint32
+// 4 <= length && length <= 1 << 24
//
//go:noescape
func emitCopyNoRepeat(dst []byte, offset int, length int) int
@@ -185,7 +187,8 @@ func emitCopyNoRepeat(dst []byte, offset int, length int) int
// matchLen returns how many bytes match in a and b
//
// It assumes that:
-// len(a) <= len(b)
+//
+// len(a) <= len(b)
//
//go:noescape
func matchLen(a []byte, b []byte) int
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
index 36915d949..81a487d6d 100644
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
@@ -1,7 +1,6 @@
// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
-// +build !appengine,!noasm,gc,!noasm
#include "textflag.h"
@@ -5743,9 +5742,9 @@ emit_literal_done_emit_remainder_encodeBlockAsm8B:
// func encodeBetterBlockAsm(dst []byte, src []byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm(SB), $327704-56
+TEXT ·encodeBetterBlockAsm(SB), $589848-56
MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
+ MOVQ $0x00001200, CX
LEAQ 24(SP), DX
PXOR X0, X0
@@ -5797,27 +5796,37 @@ check_maxskip_cont_encodeBetterBlockAsm:
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
+ MOVL 524312(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVL CX, 524312(SP)(R11*4)
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm
+
+no_short_found_encodeBetterBlockAsm:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm
candidateS_match_encodeBetterBlockAsm:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
@@ -6590,52 +6599,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm:
match_nolit_dst_ok_encodeBetterBlockAsm:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x2f, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 524312(SP)(R11*4)
+ MOVL R14, 524312(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x08, R8
+ IMULQ SI, R8
+ SHRQ $0x2f, R8
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm
emit_remainder_encodeBetterBlockAsm:
MOVQ src_len+32(FP), CX
@@ -6815,9 +6821,9 @@ emit_literal_done_emit_remainder_encodeBetterBlockAsm:
// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56
+TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56
MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
+ MOVQ $0x00001200, CX
LEAQ 24(SP), DX
PXOR X0, X0
@@ -6869,27 +6875,37 @@ check_maxskip_cont_encodeBetterBlockAsm4MB:
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
+ MOVL 524312(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVL CX, 524312(SP)(R11*4)
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm4MB
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm4MB
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm4MB
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm4MB
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm4MB
+
+no_short_found_encodeBetterBlockAsm4MB:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm4MB
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm4MB
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm4MB
candidateS_match_encodeBetterBlockAsm4MB:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
@@ -7600,52 +7616,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
match_nolit_dst_ok_encodeBetterBlockAsm4MB:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x2f, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 524312(SP)(R11*4)
+ MOVL R14, 524312(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm4MB:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm4MB
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x08, R8
+ IMULQ SI, R8
+ SHRQ $0x2f, R8
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm4MB
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm4MB
emit_remainder_encodeBetterBlockAsm4MB:
MOVQ src_len+32(FP), CX
@@ -7871,12 +7884,22 @@ search_loop_encodeBetterBlockAsm12B:
MOVL 65560(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 65560(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm12B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm12B
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm12B
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm12B
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm12B
+
+no_short_found_encodeBetterBlockAsm12B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm12B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm12B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm12B
candidateS_match_encodeBetterBlockAsm12B:
SHRQ $0x08, DI
@@ -8447,52 +8470,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
match_nolit_dst_ok_encodeBetterBlockAsm12B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x34, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x34, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x32, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x34, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 65560(SP)(R11*4)
- MOVL R15, 65560(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 65560(SP)(R11*4)
+ MOVL R14, 65560(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm12B:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm12B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x32, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x34, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 65560(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm12B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm12B
emit_remainder_encodeBetterBlockAsm12B:
MOVQ src_len+32(FP), CX
@@ -8707,12 +8727,22 @@ search_loop_encodeBetterBlockAsm10B:
MOVL 16408(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 16408(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm10B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm10B
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm10B
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm10B
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm10B
+
+no_short_found_encodeBetterBlockAsm10B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm10B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm10B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm10B
candidateS_match_encodeBetterBlockAsm10B:
SHRQ $0x08, DI
@@ -9283,52 +9313,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
match_nolit_dst_ok_encodeBetterBlockAsm10B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x36, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x36, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x34, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x36, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 16408(SP)(R11*4)
- MOVL R15, 16408(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 16408(SP)(R11*4)
+ MOVL R14, 16408(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm10B:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm10B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x34, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x36, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 16408(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm10B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm10B
emit_remainder_encodeBetterBlockAsm10B:
MOVQ src_len+32(FP), CX
@@ -9543,12 +9570,22 @@ search_loop_encodeBetterBlockAsm8B:
MOVL 4120(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 4120(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm8B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm8B
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm8B
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm8B
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm8B
+
+no_short_found_encodeBetterBlockAsm8B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm8B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm8B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm8B
candidateS_match_encodeBetterBlockAsm8B:
SHRQ $0x08, DI
@@ -10105,52 +10142,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
match_nolit_dst_ok_encodeBetterBlockAsm8B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x38, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x38, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x36, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x38, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 4120(SP)(R11*4)
- MOVL R15, 4120(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 4120(SP)(R11*4)
+ MOVL R14, 4120(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm8B:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm8B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x36, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x38, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 4120(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm8B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm8B
emit_remainder_encodeBetterBlockAsm8B:
MOVQ src_len+32(FP), CX
@@ -14287,9 +14321,9 @@ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm(SB), $327704-56
+TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56
MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
+ MOVQ $0x00001200, CX
LEAQ 24(SP), DX
PXOR X0, X0
@@ -14341,27 +14375,37 @@ check_maxskip_cont_encodeSnappyBetterBlockAsm:
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
+ MOVL 524312(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVL CX, 524312(SP)(R11*4)
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm
+
+no_short_found_encodeSnappyBetterBlockAsm:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm
candidateS_match_encodeSnappyBetterBlockAsm:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
@@ -14685,52 +14729,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x2f, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 524312(SP)(R11*4)
+ MOVL R14, 524312(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x08, R8
+ IMULQ SI, R8
+ SHRQ $0x2f, R8
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm
emit_remainder_encodeSnappyBetterBlockAsm:
MOVQ src_len+32(FP), CX
@@ -14964,12 +15005,22 @@ search_loop_encodeSnappyBetterBlockAsm64K:
MOVL 262168(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm64K
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm64K
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm64K
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm64K
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm64K
+
+no_short_found_encodeSnappyBetterBlockAsm64K:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm64K
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm64K
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm64K
candidateS_match_encodeSnappyBetterBlockAsm64K:
SHRQ $0x08, DI
@@ -15248,52 +15299,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x30, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 262168(SP)(R11*4)
+ MOVL R14, 262168(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm64K:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm64K
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x08, R8
+ IMULQ SI, R8
+ SHRQ $0x30, R8
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm64K
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm64K
emit_remainder_encodeSnappyBetterBlockAsm64K:
MOVQ src_len+32(FP), CX
@@ -15508,12 +15556,22 @@ search_loop_encodeSnappyBetterBlockAsm12B:
MOVL 65560(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 65560(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm12B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm12B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm12B
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm12B
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm12B
+
+no_short_found_encodeSnappyBetterBlockAsm12B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm12B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm12B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm12B
candidateS_match_encodeSnappyBetterBlockAsm12B:
SHRQ $0x08, DI
@@ -15792,52 +15850,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x34, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x34, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x32, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x34, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 65560(SP)(R11*4)
- MOVL R15, 65560(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 65560(SP)(R11*4)
+ MOVL R14, 65560(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm12B:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm12B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x32, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x34, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 65560(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm12B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm12B
emit_remainder_encodeSnappyBetterBlockAsm12B:
MOVQ src_len+32(FP), CX
@@ -16052,12 +16107,22 @@ search_loop_encodeSnappyBetterBlockAsm10B:
MOVL 16408(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 16408(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm10B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm10B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm10B
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm10B
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm10B
+
+no_short_found_encodeSnappyBetterBlockAsm10B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm10B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm10B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm10B
candidateS_match_encodeSnappyBetterBlockAsm10B:
SHRQ $0x08, DI
@@ -16336,52 +16401,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x36, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x36, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x34, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x36, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 16408(SP)(R11*4)
- MOVL R15, 16408(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 16408(SP)(R11*4)
+ MOVL R14, 16408(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm10B:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm10B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x34, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x36, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 16408(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm10B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm10B
emit_remainder_encodeSnappyBetterBlockAsm10B:
MOVQ src_len+32(FP), CX
@@ -16596,12 +16658,22 @@ search_loop_encodeSnappyBetterBlockAsm8B:
MOVL 4120(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 4120(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm8B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm8B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm8B
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm8B
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm8B
+
+no_short_found_encodeSnappyBetterBlockAsm8B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm8B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm8B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm8B
candidateS_match_encodeSnappyBetterBlockAsm8B:
SHRQ $0x08, DI
@@ -16878,52 +16950,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x38, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x38, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x36, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x38, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 4120(SP)(R11*4)
- MOVL R15, 4120(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 4120(SP)(R11*4)
+ MOVL R14, 4120(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm8B:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm8B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x36, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x38, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 4120(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm8B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm8B
emit_remainder_encodeSnappyBetterBlockAsm8B:
MOVQ src_len+32(FP), CX
diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md
index ea7df3dd8..857a93e59 100644
--- a/vendor/github.com/klauspost/cpuid/v2/README.md
+++ b/vendor/github.com/klauspost/cpuid/v2/README.md
@@ -16,10 +16,17 @@ Package home: https://github.com/klauspost/cpuid
## installing
-`go get -u github.com/klauspost/cpuid/v2` using modules.
-
+`go get -u github.com/klauspost/cpuid/v2` using modules.
Drop `v2` for others.
+### Homebrew
+
+For macOS/Linux users, you can install via [brew](https://brew.sh/)
+
+```sh
+$ brew install cpuid
+```
+
## example
```Go
@@ -77,10 +84,14 @@ We have Streaming SIMD 2 Extensions
The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.
+To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc.
+This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported.
+
Note that for some cpu/os combinations some features will not be detected.
`amd64` has rather good support and should work reliably on all platforms.
-Note that hypervisors may not pass through all CPU features.
+Note that hypervisors may not pass through all CPU features through to the guest OS,
+so even if your host supports a feature it may not be visible on guests.
## arm64 feature detection
@@ -253,6 +264,218 @@ Exit Code 0
Exit Code 1
```
+
+## Available flags
+
+### x86 & amd64
+
+| Feature Flag | Description |
+|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ADX | Intel ADX (Multi-Precision Add-Carry Instruction Extensions) |
+| AESNI | Advanced Encryption Standard New Instructions |
+| AMD3DNOW | AMD 3DNOW |
+| AMD3DNOWEXT | AMD 3DNowExt |
+| AMXBF16 | Tile computational operations on BFLOAT16 numbers |
+| AMXINT8 | Tile computational operations on 8-bit integers |
+| AMXFP16 | Tile computational operations on FP16 numbers |
+| AMXTILE | Tile architecture |
+| AVX | AVX functions |
+| AVX2 | AVX2 functions |
+| AVX512BF16 | AVX-512 BFLOAT16 Instructions |
+| AVX512BITALG | AVX-512 Bit Algorithms |
+| AVX512BW | AVX-512 Byte and Word Instructions |
+| AVX512CD | AVX-512 Conflict Detection Instructions |
+| AVX512DQ | AVX-512 Doubleword and Quadword Instructions |
+| AVX512ER | AVX-512 Exponential and Reciprocal Instructions |
+| AVX512F | AVX-512 Foundation |
+| AVX512FP16 | AVX-512 FP16 Instructions |
+| AVX512IFMA | AVX-512 Integer Fused Multiply-Add Instructions |
+| AVX512PF | AVX-512 Prefetch Instructions |
+| AVX512VBMI | AVX-512 Vector Bit Manipulation Instructions |
+| AVX512VBMI2 | AVX-512 Vector Bit Manipulation Instructions, Version 2 |
+| AVX512VL | AVX-512 Vector Length Extensions |
+| AVX512VNNI | AVX-512 Vector Neural Network Instructions |
+| AVX512VP2INTERSECT | AVX-512 Intersect for D/Q |
+| AVX512VPOPCNTDQ | AVX-512 Vector Population Count Doubleword and Quadword |
+| AVXIFMA | AVX-IFMA instructions |
+| AVXNECONVERT | AVX-NE-CONVERT instructions |
+| AVXSLOW | Indicates the CPU performs 2 128 bit operations instead of one |
+| AVXVNNI | AVX (VEX encoded) VNNI neural network instructions |
+| AVXVNNIINT8 | AVX-VNNI-INT8 instructions |
+| BMI1 | Bit Manipulation Instruction Set 1 |
+| BMI2 | Bit Manipulation Instruction Set 2 |
+| CETIBT | Intel CET Indirect Branch Tracking |
+| CETSS | Intel CET Shadow Stack |
+| CLDEMOTE | Cache Line Demote |
+| CLMUL | Carry-less Multiplication |
+| CLZERO | CLZERO instruction supported |
+| CMOV | i686 CMOV |
+| CMPCCXADD | CMPCCXADD instructions |
+| CMPSB_SCADBS_SHORT | Fast short CMPSB and SCASB |
+| CMPXCHG8 | CMPXCHG8 instruction |
+| CPBOOST | Core Performance Boost |
+| CPPC | AMD: Collaborative Processor Performance Control |
+| CX16 | CMPXCHG16B Instruction |
+| EFER_LMSLE_UNS | AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ |
+| ENQCMD | Enqueue Command |
+| ERMS | Enhanced REP MOVSB/STOSB |
+| F16C | Half-precision floating-point conversion |
+| FLUSH_L1D | Flush L1D cache |
+| FMA3 | Intel FMA 3. Does not imply AVX. |
+| FMA4 | Bulldozer FMA4 functions |
+| FP128 | AMD: When set, the internal FP/SIMD execution datapath is 128-bits wide |
+| FP256 | AMD: When set, the internal FP/SIMD execution datapath is 256-bits wide |
+| FSRM | Fast Short Rep Mov |
+| FXSR | FXSAVE, FXRESTOR instructions, CR4 bit 9 |
+| FXSROPT | FXSAVE/FXRSTOR optimizations |
+| GFNI | Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. |
+| HLE | Hardware Lock Elision |
+| HRESET | If set CPU supports history reset and the IA32_HRESET_ENABLE MSR |
+| HTT | Hyperthreading (enabled) |
+| HWA | Hardware assert supported. Indicates support for MSRC001_10 |
+| HYBRID_CPU | This part has CPUs of more than one type. |
+| HYPERVISOR | This bit has been reserved by Intel & AMD for use by hypervisors |
+| IA32_ARCH_CAP | IA32_ARCH_CAPABILITIES MSR (Intel) |
+| IA32_CORE_CAP | IA32_CORE_CAPABILITIES MSR |
+| IBPB | Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) |
+| IBRS | AMD: Indirect Branch Restricted Speculation |
+| IBRS_PREFERRED | AMD: IBRS is preferred over software solution |
+| IBRS_PROVIDES_SMP | AMD: IBRS provides Same Mode Protection |
+| IBS | Instruction Based Sampling (AMD) |
+| IBSBRNTRGT | Instruction Based Sampling Feature (AMD) |
+| IBSFETCHSAM | Instruction Based Sampling Feature (AMD) |
+| IBSFFV | Instruction Based Sampling Feature (AMD) |
+| IBSOPCNT | Instruction Based Sampling Feature (AMD) |
+| IBSOPCNTEXT | Instruction Based Sampling Feature (AMD) |
+| IBSOPSAM | Instruction Based Sampling Feature (AMD) |
+| IBSRDWROPCNT | Instruction Based Sampling Feature (AMD) |
+| IBSRIPINVALIDCHK | Instruction Based Sampling Feature (AMD) |
+| IBS_FETCH_CTLX | AMD: IBS fetch control extended MSR supported |
+| IBS_OPDATA4 | AMD: IBS op data 4 MSR supported |
+| IBS_OPFUSE | AMD: Indicates support for IbsOpFuse |
+| IBS_PREVENTHOST | Disallowing IBS use by the host supported |
+| IBS_ZEN4 | Fetch and Op IBS support IBS extensions added with Zen4 |
+| INT_WBINVD | WBINVD/WBNOINVD are interruptible. |
+| INVLPGB | NVLPGB and TLBSYNC instruction supported |
+| LAHF | LAHF/SAHF in long mode |
+| LAM | If set, CPU supports Linear Address Masking |
+| LBRVIRT | LBR virtualization |
+| LZCNT | LZCNT instruction |
+| MCAOVERFLOW | MCA overflow recovery support. |
+| MCDT_NO | Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. |
+| MCOMMIT | MCOMMIT instruction supported |
+| MD_CLEAR | VERW clears CPU buffers |
+| MMX | standard MMX |
+| MMXEXT | SSE integer functions or AMD MMX ext |
+| MOVBE | MOVBE instruction (big-endian) |
+| MOVDIR64B | Move 64 Bytes as Direct Store |
+| MOVDIRI | Move Doubleword as Direct Store |
+| MOVSB_ZL | Fast Zero-Length MOVSB |
+| MPX | Intel MPX (Memory Protection Extensions) |
+| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD |
+| MSRIRC | Instruction Retired Counter MSR available |
+| MSR_PAGEFLUSH | Page Flush MSR available |
+| NRIPS | Indicates support for NRIP save on VMEXIT |
+| NX | NX (No-Execute) bit |
+| OSXSAVE | XSAVE enabled by OS |
+| PCONFIG | PCONFIG for Intel Multi-Key Total Memory Encryption |
+| POPCNT | POPCNT instruction |
+| PPIN | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled |
+| PREFETCHI | PREFETCHIT0/1 instructions |
+| PSFD | AMD: Predictive Store Forward Disable |
+| RDPRU | RDPRU instruction supported |
+| RDRAND | RDRAND instruction is available |
+| RDSEED | RDSEED instruction is available |
+| RDTSCP | RDTSCP Instruction |
+| RTM | Restricted Transactional Memory |
+| RTM_ALWAYS_ABORT | Indicates that the loaded microcode is forcing RTM abort. |
+| SERIALIZE | Serialize Instruction Execution |
+| SEV | AMD Secure Encrypted Virtualization supported |
+| SEV_64BIT | AMD SEV guest execution only allowed from a 64-bit host |
+| SEV_ALTERNATIVE | AMD SEV Alternate Injection supported |
+| SEV_DEBUGSWAP | Full debug state swap supported for SEV-ES guests |
+| SEV_ES | AMD SEV Encrypted State supported |
+| SEV_RESTRICTED | AMD SEV Restricted Injection supported |
+| SEV_SNP | AMD SEV Secure Nested Paging supported |
+| SGX | Software Guard Extensions |
+| SGXLC | Software Guard Extensions Launch Control |
+| SHA | Intel SHA Extensions |
+| SME | AMD Secure Memory Encryption supported |
+| SME_COHERENT | AMD Hardware cache coherency across encryption domains enforced |
+| SPEC_CTRL_SSBD | Speculative Store Bypass Disable |
+| SRBDS_CTRL | SRBDS mitigation MSR available |
+| SSE | SSE functions |
+| SSE2 | P4 SSE functions |
+| SSE3 | Prescott SSE3 functions |
+| SSE4 | Penryn SSE4.1 functions |
+| SSE42 | Nehalem SSE4.2 functions |
+| SSE4A | AMD Barcelona microarchitecture SSE4a instructions |
+| SSSE3 | Conroe SSSE3 functions |
+| STIBP | Single Thread Indirect Branch Predictors |
+| STIBP_ALWAYSON | AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On |
+| STOSB_SHORT | Fast short STOSB |
+| SUCCOR | Software uncorrectable error containment and recovery capability. |
+| SVM | AMD Secure Virtual Machine |
+| SVMDA | Indicates support for the SVM decode assists. |
+| SVMFBASID | SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control |
+| SVML | AMD SVM lock. Indicates support for SVM-Lock. |
+| SVMNP | AMD SVM nested paging |
+| SVMPF | SVM pause intercept filter. Indicates support for the pause intercept filter |
+| SVMPFT | SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold |
+| SYSCALL | System-Call Extension (SCE): SYSCALL and SYSRET instructions. |
+| SYSEE | SYSENTER and SYSEXIT instructions |
+| TBM | AMD Trailing Bit Manipulation |
+| TLB_FLUSH_NESTED | AMD: Flushing includes all the nested translations for guest translations |
+| TME | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. |
+| TOPEXT | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. |
+| TSCRATEMSR | MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 |
+| TSXLDTRK | Intel TSX Suspend Load Address Tracking |
+| VAES | Vector AES. AVX(512) versions requires additional checks. |
+| VMCBCLEAN | VMCB clean bits. Indicates support for VMCB clean bits. |
+| VMPL | AMD VM Permission Levels supported |
+| VMSA_REGPROT | AMD VMSA Register Protection supported |
+| VMX | Virtual Machine Extensions |
+| VPCLMULQDQ | Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. |
+| VTE | AMD Virtual Transparent Encryption supported |
+| WAITPKG | TPAUSE, UMONITOR, UMWAIT |
+| WBNOINVD | Write Back and Do Not Invalidate Cache |
+| X87 | FPU |
+| XGETBV1 | Supports XGETBV with ECX = 1 |
+| XOP | Bulldozer XOP functions |
+| XSAVE | XSAVE, XRESTOR, XSETBV, XGETBV |
+| XSAVEC | Supports XSAVEC and the compacted form of XRSTOR. |
+| XSAVEOPT | XSAVEOPT available |
+| XSAVES | Supports XSAVES/XRSTORS and IA32_XSS |
+
+# ARM features:
+
+| Feature Flag | Description |
+|--------------|------------------------------------------------------------------|
+| AESARM | AES instructions |
+| ARMCPUID | Some CPU ID registers readable at user-level |
+| ASIMD | Advanced SIMD |
+| ASIMDDP | SIMD Dot Product |
+| ASIMDHP | Advanced SIMD half-precision floating point |
+| ASIMDRDM | Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) |
+| ATOMICS | Large System Extensions (LSE) |
+| CRC32 | CRC32/CRC32C instructions |
+| DCPOP | Data cache clean to Point of Persistence (DC CVAP) |
+| EVTSTRM | Generic timer |
+| FCMA | Floatin point complex number addition and multiplication |
+| FP | Single-precision and double-precision floating point |
+| FPHP | Half-precision floating point |
+| GPA | Generic Pointer Authentication |
+| JSCVT | Javascript-style double->int convert (FJCVTZS) |
+| LRCPC | Weaker release consistency (LDAPR, etc) |
+| PMULL | Polynomial Multiply instructions (PMULL/PMULL2) |
+| SHA1 | SHA-1 instructions (SHA1C, etc) |
+| SHA2 | SHA-2 instructions (SHA256H, etc) |
+| SHA3 | SHA-3 instructions (EOR3, RAXI, XAR, BCAX) |
+| SHA512 | SHA512 instructions |
+| SM3 | SM3 instructions |
+| SM4 | SM4 instructions |
+| SVE | Scalable Vector Extension |
+
# license
This code is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
index 27f33250e..cf2ae9c51 100644
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -73,6 +73,7 @@ const (
AMD3DNOW // AMD 3DNOW
AMD3DNOWEXT // AMD 3DNowExt
AMXBF16 // Tile computational operations on BFLOAT16 numbers
+ AMXFP16 // Tile computational operations on FP16 numbers
AMXINT8 // Tile computational operations on 8-bit integers
AMXTILE // Tile architecture
AVX // AVX functions
@@ -93,8 +94,11 @@ const (
AVX512VNNI // AVX-512 Vector Neural Network Instructions
AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
+ AVXIFMA // AVX-IFMA instructions
+ AVXNECONVERT // AVX-NE-CONVERT instructions
AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
+ AVXVNNIINT8 // AVX-VNNI-INT8 instructions
BMI1 // Bit Manipulation Instruction Set 1
BMI2 // Bit Manipulation Instruction Set 2
CETIBT // Intel CET Indirect Branch Tracking
@@ -103,15 +107,22 @@ const (
CLMUL // Carry-less Multiplication
CLZERO // CLZERO instruction supported
CMOV // i686 CMOV
+ CMPCCXADD // CMPCCXADD instructions
CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
CMPXCHG8 // CMPXCHG8 instruction
CPBOOST // Core Performance Boost
+ CPPC // AMD: Collaborative Processor Performance Control
CX16 // CMPXCHG16B Instruction
+ EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
ENQCMD // Enqueue Command
ERMS // Enhanced REP MOVSB/STOSB
F16C // Half-precision floating-point conversion
+ FLUSH_L1D // Flush L1D cache
FMA3 // Intel FMA 3. Does not imply AVX.
FMA4 // Bulldozer FMA4 functions
+ FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
+ FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
+ FSRM // Fast Short Rep Mov
FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
FXSROPT // FXSAVE/FXRSTOR optimizations
GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
@@ -119,8 +130,14 @@ const (
HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
HTT // Hyperthreading (enabled)
HWA // Hardware assert supported. Indicates support for MSRC001_10
+ HYBRID_CPU // This part has CPUs of more than one type.
HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
+ IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel)
+ IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR
IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+ IBRS // AMD: Indirect Branch Restricted Speculation
+ IBRS_PREFERRED // AMD: IBRS is preferred over software solution
+ IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection
IBS // Instruction Based Sampling (AMD)
IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
@@ -130,7 +147,11 @@ const (
IBSOPSAM // Instruction Based Sampling Feature (AMD)
IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
+ IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported
+ IBS_OPDATA4 // AMD: IBS op data 4 MSR supported
+ IBS_OPFUSE // AMD: Indicates support for IbsOpFuse
IBS_PREVENTHOST // Disallowing IBS use by the host supported
+ IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4
INT_WBINVD // WBINVD/WBNOINVD are interruptible.
INVLPGB // NVLPGB and TLBSYNC instruction supported
LAHF // LAHF/SAHF in long mode
@@ -138,13 +159,16 @@ const (
LBRVIRT // LBR virtualization
LZCNT // LZCNT instruction
MCAOVERFLOW // MCA overflow recovery support.
+ MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
MCOMMIT // MCOMMIT instruction supported
+ MD_CLEAR // VERW clears CPU buffers
MMX // standard MMX
MMXEXT // SSE integer functions or AMD MMX ext
MOVBE // MOVBE instruction (big-endian)
MOVDIR64B // Move 64 Bytes as Direct Store
MOVDIRI // Move Doubleword as Direct Store
MOVSB_ZL // Fast Zero-Length MOVSB
+ MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
MPX // Intel MPX (Memory Protection Extensions)
MSRIRC // Instruction Retired Counter MSR available
MSR_PAGEFLUSH // Page Flush MSR available
@@ -153,6 +177,9 @@ const (
OSXSAVE // XSAVE enabled by OS
PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
POPCNT // POPCNT instruction
+ PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
+ PREFETCHI // PREFETCHIT0/1 instructions
+ PSFD // AMD: Predictive Store Forward Disable
RDPRU // RDPRU instruction supported
RDRAND // RDRAND instruction is available
RDSEED // RDSEED instruction is available
@@ -172,6 +199,8 @@ const (
SHA // Intel SHA Extensions
SME // AMD Secure Memory Encryption supported
SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
+ SPEC_CTRL_SSBD // Speculative Store Bypass Disable
+ SRBDS_CTRL // SRBDS mitigation MSR available
SSE // SSE functions
SSE2 // P4 SSE functions
SSE3 // Prescott SSE3 functions
@@ -180,6 +209,7 @@ const (
SSE4A // AMD Barcelona microarchitecture SSE4a instructions
SSSE3 // Conroe SSSE3 functions
STIBP // Single Thread Indirect Branch Predictors
+ STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
STOSB_SHORT // Fast short STOSB
SUCCOR // Software uncorrectable error containment and recovery capability.
SVM // AMD Secure Virtual Machine
@@ -192,8 +222,9 @@ const (
SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
SYSEE // SYSENTER and SYSEXIT instructions
TBM // AMD Trailing Bit Manipulation
- TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+ TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations
TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+ TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
TSXLDTRK // Intel TSX Suspend Load Address Tracking
VAES // Vector AES. AVX(512) versions requires additional checks.
@@ -358,7 +389,7 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool {
// Has allows for checking a single feature.
// Should be inlined by the compiler.
-func (c CPUInfo) Has(id FeatureID) bool {
+func (c *CPUInfo) Has(id FeatureID) bool {
return c.featureSet.inSet(id)
}
@@ -372,26 +403,47 @@ func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
return false
}
+// Features contains several features combined for a fast check using
+// CpuInfo.HasAll
+type Features *flagSet
+
+// CombineFeatures allows to combine several features for a close to constant time lookup.
+func CombineFeatures(ids ...FeatureID) Features {
+ var v flagSet
+ for _, id := range ids {
+ v.set(id)
+ }
+ return &v
+}
+
+func (c *CPUInfo) HasAll(f Features) bool {
+ return c.featureSet.hasSetP(f)
+}
+
// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
-var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
-var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
-var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
-var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
+var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
+var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
+var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
+var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
+var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
// X64Level returns the microarchitecture level detected on the CPU.
// If features are lacking or non x64 mode, 0 is returned.
// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
func (c CPUInfo) X64Level() int {
- if c.featureSet.hasSet(level4Features) {
+ if !c.featureSet.hasOneOf(oneOfLevel) {
+ return 0
+ }
+ if c.featureSet.hasSetP(level4Features) {
return 4
}
- if c.featureSet.hasSet(level3Features) {
+ if c.featureSet.hasSetP(level3Features) {
return 3
}
- if c.featureSet.hasSet(level2Features) {
+ if c.featureSet.hasSetP(level2Features) {
return 2
}
- if c.featureSet.hasSet(level1Features) {
+ if c.featureSet.hasSetP(level1Features) {
return 1
}
return 0
@@ -555,7 +607,7 @@ const flagMask = flagBits - 1
// flagSet contains detected cpu features and characteristics in an array of flags
type flagSet [(lastID + flagMask) / flagBits]flags
-func (s flagSet) inSet(feat FeatureID) bool {
+func (s *flagSet) inSet(feat FeatureID) bool {
return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
}
@@ -585,7 +637,17 @@ func (s *flagSet) or(other flagSet) {
}
// hasSet returns whether all features are present.
-func (s flagSet) hasSet(other flagSet) bool {
+func (s *flagSet) hasSet(other flagSet) bool {
+ for i, v := range other[:] {
+ if s[i]&v != v {
+ return false
+ }
+ }
+ return true
+}
+
+// hasSet returns whether all features are present.
+func (s *flagSet) hasSetP(other *flagSet) bool {
for i, v := range other[:] {
if s[i]&v != v {
return false
@@ -594,8 +656,18 @@ func (s flagSet) hasSet(other flagSet) bool {
return true
}
+// hasOneOf returns whether one or more features are present.
+func (s *flagSet) hasOneOf(other *flagSet) bool {
+ for i, v := range other[:] {
+ if s[i]&v != 0 {
+ return true
+ }
+ }
+ return false
+}
+
// nEnabled will return the number of enabled flags.
-func (s flagSet) nEnabled() (n int) {
+func (s *flagSet) nEnabled() (n int) {
for _, v := range s[:] {
n += bits.OnesCount64(uint64(v))
}
@@ -1093,21 +1165,36 @@ func support() flagSet {
fs.setIf(ecx&(1<<30) != 0, SGXLC)
// CPUID.(EAX=7, ECX=0).EDX
+ fs.setIf(edx&(1<<4) != 0, FSRM)
+ fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
+ fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
fs.setIf(edx&(1<<14) != 0, SERIALIZE)
+ fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
fs.setIf(edx&(1<<18) != 0, PCONFIG)
fs.setIf(edx&(1<<20) != 0, CETIBT)
fs.setIf(edx&(1<<26) != 0, IBPB)
fs.setIf(edx&(1<<27) != 0, STIBP)
+ fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
+ fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
+ fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
+ fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
+
+ // CPUID.(EAX=7, ECX=1).EDX
+ fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8)
+ fs.setIf(edx&(1<<5) != 0, AVXNECONVERT)
+ fs.setIf(edx&(1<<14) != 0, PREFETCHI)
- // CPUID.(EAX=7, ECX=1)
+ // CPUID.(EAX=7, ECX=1).EAX
eax1, _, _, _ := cpuidex(7, 1)
fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
+ fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
fs.setIf(eax1&(1<<22) != 0, HRESET)
+ fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
fs.setIf(eax1&(1<<26) != 0, LAM)
// Only detect AVX-512 features if XGETBV is supported
@@ -1145,9 +1232,15 @@ func support() flagSet {
fs.setIf(edx&(1<<25) != 0, AMXINT8)
// eax1 = CPUID.(EAX=7, ECX=1).EAX
fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
+ fs.setIf(eax1&(1<<21) != 0, AMXFP16)
}
}
+
+ // CPUID.(EAX=7, ECX=2)
+ _, _, _, edx = cpuidex(7, 2)
+ fs.setIf(edx&(1<<5) != 0, MCDT_NO)
}
+
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
// EAX
// Bit 00: XSAVEOPT is available.
@@ -1212,9 +1305,21 @@ func support() flagSet {
if maxExtendedFunction() >= 0x80000008 {
_, b, _, _ := cpuid(0x80000008)
+ fs.setIf(b&(1<<28) != 0, PSFD)
+ fs.setIf(b&(1<<27) != 0, CPPC)
+ fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
+ fs.setIf(b&(1<<23) != 0, PPIN)
+ fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
+ fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
+ fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
+ fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
+ fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
+ fs.setIf(b&(1<<15) != 0, STIBP)
+ fs.setIf(b&(1<<14) != 0, IBRS)
+ fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
+ fs.setIf(b&(1<<12) != 0, IBPB)
fs.setIf((b&(1<<9)) != 0, WBNOINVD)
fs.setIf((b&(1<<8)) != 0, MCOMMIT)
- fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
fs.setIf((b&(1<<4)) != 0, RDPRU)
fs.setIf((b&(1<<3)) != 0, INVLPGB)
fs.setIf((b&(1<<1)) != 0, MSRIRC)
@@ -1235,6 +1340,13 @@ func support() flagSet {
fs.setIf((edx>>12)&1 == 1, SVMPFT)
}
+ if maxExtendedFunction() >= 0x8000001a {
+ eax, _, _, _ := cpuid(0x8000001a)
+ fs.setIf((eax>>0)&1 == 1, FP128)
+ fs.setIf((eax>>1)&1 == 1, MOVU)
+ fs.setIf((eax>>2)&1 == 1, FP256)
+ }
+
if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
eax, _, _, _ := cpuid(0x8000001b)
fs.setIf((eax>>0)&1 == 1, IBSFFV)
@@ -1245,6 +1357,10 @@ func support() flagSet {
fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
+ fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
+ fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
+ fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
+ fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
}
if maxExtendedFunction() >= 0x8000001f && vend == AMD {
diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
index d12e547c4..8b6cd2b72 100644
--- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
+++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
@@ -13,176 +13,207 @@ func _() {
_ = x[AMD3DNOW-3]
_ = x[AMD3DNOWEXT-4]
_ = x[AMXBF16-5]
- _ = x[AMXINT8-6]
- _ = x[AMXTILE-7]
- _ = x[AVX-8]
- _ = x[AVX2-9]
- _ = x[AVX512BF16-10]
- _ = x[AVX512BITALG-11]
- _ = x[AVX512BW-12]
- _ = x[AVX512CD-13]
- _ = x[AVX512DQ-14]
- _ = x[AVX512ER-15]
- _ = x[AVX512F-16]
- _ = x[AVX512FP16-17]
- _ = x[AVX512IFMA-18]
- _ = x[AVX512PF-19]
- _ = x[AVX512VBMI-20]
- _ = x[AVX512VBMI2-21]
- _ = x[AVX512VL-22]
- _ = x[AVX512VNNI-23]
- _ = x[AVX512VP2INTERSECT-24]
- _ = x[AVX512VPOPCNTDQ-25]
- _ = x[AVXSLOW-26]
- _ = x[AVXVNNI-27]
- _ = x[BMI1-28]
- _ = x[BMI2-29]
- _ = x[CETIBT-30]
- _ = x[CETSS-31]
- _ = x[CLDEMOTE-32]
- _ = x[CLMUL-33]
- _ = x[CLZERO-34]
- _ = x[CMOV-35]
- _ = x[CMPSB_SCADBS_SHORT-36]
- _ = x[CMPXCHG8-37]
- _ = x[CPBOOST-38]
- _ = x[CX16-39]
- _ = x[ENQCMD-40]
- _ = x[ERMS-41]
- _ = x[F16C-42]
- _ = x[FMA3-43]
- _ = x[FMA4-44]
- _ = x[FXSR-45]
- _ = x[FXSROPT-46]
- _ = x[GFNI-47]
- _ = x[HLE-48]
- _ = x[HRESET-49]
- _ = x[HTT-50]
- _ = x[HWA-51]
- _ = x[HYPERVISOR-52]
- _ = x[IBPB-53]
- _ = x[IBS-54]
- _ = x[IBSBRNTRGT-55]
- _ = x[IBSFETCHSAM-56]
- _ = x[IBSFFV-57]
- _ = x[IBSOPCNT-58]
- _ = x[IBSOPCNTEXT-59]
- _ = x[IBSOPSAM-60]
- _ = x[IBSRDWROPCNT-61]
- _ = x[IBSRIPINVALIDCHK-62]
- _ = x[IBS_PREVENTHOST-63]
- _ = x[INT_WBINVD-64]
- _ = x[INVLPGB-65]
- _ = x[LAHF-66]
- _ = x[LAM-67]
- _ = x[LBRVIRT-68]
- _ = x[LZCNT-69]
- _ = x[MCAOVERFLOW-70]
- _ = x[MCOMMIT-71]
- _ = x[MMX-72]
- _ = x[MMXEXT-73]
- _ = x[MOVBE-74]
- _ = x[MOVDIR64B-75]
- _ = x[MOVDIRI-76]
- _ = x[MOVSB_ZL-77]
- _ = x[MPX-78]
- _ = x[MSRIRC-79]
- _ = x[MSR_PAGEFLUSH-80]
- _ = x[NRIPS-81]
- _ = x[NX-82]
- _ = x[OSXSAVE-83]
- _ = x[PCONFIG-84]
- _ = x[POPCNT-85]
- _ = x[RDPRU-86]
- _ = x[RDRAND-87]
- _ = x[RDSEED-88]
- _ = x[RDTSCP-89]
- _ = x[RTM-90]
- _ = x[RTM_ALWAYS_ABORT-91]
- _ = x[SERIALIZE-92]
- _ = x[SEV-93]
- _ = x[SEV_64BIT-94]
- _ = x[SEV_ALTERNATIVE-95]
- _ = x[SEV_DEBUGSWAP-96]
- _ = x[SEV_ES-97]
- _ = x[SEV_RESTRICTED-98]
- _ = x[SEV_SNP-99]
- _ = x[SGX-100]
- _ = x[SGXLC-101]
- _ = x[SHA-102]
- _ = x[SME-103]
- _ = x[SME_COHERENT-104]
- _ = x[SSE-105]
- _ = x[SSE2-106]
- _ = x[SSE3-107]
- _ = x[SSE4-108]
- _ = x[SSE42-109]
- _ = x[SSE4A-110]
- _ = x[SSSE3-111]
- _ = x[STIBP-112]
- _ = x[STOSB_SHORT-113]
- _ = x[SUCCOR-114]
- _ = x[SVM-115]
- _ = x[SVMDA-116]
- _ = x[SVMFBASID-117]
- _ = x[SVML-118]
- _ = x[SVMNP-119]
- _ = x[SVMPF-120]
- _ = x[SVMPFT-121]
- _ = x[SYSCALL-122]
- _ = x[SYSEE-123]
- _ = x[TBM-124]
- _ = x[TOPEXT-125]
- _ = x[TME-126]
- _ = x[TSCRATEMSR-127]
- _ = x[TSXLDTRK-128]
- _ = x[VAES-129]
- _ = x[VMCBCLEAN-130]
- _ = x[VMPL-131]
- _ = x[VMSA_REGPROT-132]
- _ = x[VMX-133]
- _ = x[VPCLMULQDQ-134]
- _ = x[VTE-135]
- _ = x[WAITPKG-136]
- _ = x[WBNOINVD-137]
- _ = x[X87-138]
- _ = x[XGETBV1-139]
- _ = x[XOP-140]
- _ = x[XSAVE-141]
- _ = x[XSAVEC-142]
- _ = x[XSAVEOPT-143]
- _ = x[XSAVES-144]
- _ = x[AESARM-145]
- _ = x[ARMCPUID-146]
- _ = x[ASIMD-147]
- _ = x[ASIMDDP-148]
- _ = x[ASIMDHP-149]
- _ = x[ASIMDRDM-150]
- _ = x[ATOMICS-151]
- _ = x[CRC32-152]
- _ = x[DCPOP-153]
- _ = x[EVTSTRM-154]
- _ = x[FCMA-155]
- _ = x[FP-156]
- _ = x[FPHP-157]
- _ = x[GPA-158]
- _ = x[JSCVT-159]
- _ = x[LRCPC-160]
- _ = x[PMULL-161]
- _ = x[SHA1-162]
- _ = x[SHA2-163]
- _ = x[SHA3-164]
- _ = x[SHA512-165]
- _ = x[SM3-166]
- _ = x[SM4-167]
- _ = x[SVE-168]
- _ = x[lastID-169]
+ _ = x[AMXFP16-6]
+ _ = x[AMXINT8-7]
+ _ = x[AMXTILE-8]
+ _ = x[AVX-9]
+ _ = x[AVX2-10]
+ _ = x[AVX512BF16-11]
+ _ = x[AVX512BITALG-12]
+ _ = x[AVX512BW-13]
+ _ = x[AVX512CD-14]
+ _ = x[AVX512DQ-15]
+ _ = x[AVX512ER-16]
+ _ = x[AVX512F-17]
+ _ = x[AVX512FP16-18]
+ _ = x[AVX512IFMA-19]
+ _ = x[AVX512PF-20]
+ _ = x[AVX512VBMI-21]
+ _ = x[AVX512VBMI2-22]
+ _ = x[AVX512VL-23]
+ _ = x[AVX512VNNI-24]
+ _ = x[AVX512VP2INTERSECT-25]
+ _ = x[AVX512VPOPCNTDQ-26]
+ _ = x[AVXIFMA-27]
+ _ = x[AVXNECONVERT-28]
+ _ = x[AVXSLOW-29]
+ _ = x[AVXVNNI-30]
+ _ = x[AVXVNNIINT8-31]
+ _ = x[BMI1-32]
+ _ = x[BMI2-33]
+ _ = x[CETIBT-34]
+ _ = x[CETSS-35]
+ _ = x[CLDEMOTE-36]
+ _ = x[CLMUL-37]
+ _ = x[CLZERO-38]
+ _ = x[CMOV-39]
+ _ = x[CMPCCXADD-40]
+ _ = x[CMPSB_SCADBS_SHORT-41]
+ _ = x[CMPXCHG8-42]
+ _ = x[CPBOOST-43]
+ _ = x[CPPC-44]
+ _ = x[CX16-45]
+ _ = x[EFER_LMSLE_UNS-46]
+ _ = x[ENQCMD-47]
+ _ = x[ERMS-48]
+ _ = x[F16C-49]
+ _ = x[FLUSH_L1D-50]
+ _ = x[FMA3-51]
+ _ = x[FMA4-52]
+ _ = x[FP128-53]
+ _ = x[FP256-54]
+ _ = x[FSRM-55]
+ _ = x[FXSR-56]
+ _ = x[FXSROPT-57]
+ _ = x[GFNI-58]
+ _ = x[HLE-59]
+ _ = x[HRESET-60]
+ _ = x[HTT-61]
+ _ = x[HWA-62]
+ _ = x[HYBRID_CPU-63]
+ _ = x[HYPERVISOR-64]
+ _ = x[IA32_ARCH_CAP-65]
+ _ = x[IA32_CORE_CAP-66]
+ _ = x[IBPB-67]
+ _ = x[IBRS-68]
+ _ = x[IBRS_PREFERRED-69]
+ _ = x[IBRS_PROVIDES_SMP-70]
+ _ = x[IBS-71]
+ _ = x[IBSBRNTRGT-72]
+ _ = x[IBSFETCHSAM-73]
+ _ = x[IBSFFV-74]
+ _ = x[IBSOPCNT-75]
+ _ = x[IBSOPCNTEXT-76]
+ _ = x[IBSOPSAM-77]
+ _ = x[IBSRDWROPCNT-78]
+ _ = x[IBSRIPINVALIDCHK-79]
+ _ = x[IBS_FETCH_CTLX-80]
+ _ = x[IBS_OPDATA4-81]
+ _ = x[IBS_OPFUSE-82]
+ _ = x[IBS_PREVENTHOST-83]
+ _ = x[IBS_ZEN4-84]
+ _ = x[INT_WBINVD-85]
+ _ = x[INVLPGB-86]
+ _ = x[LAHF-87]
+ _ = x[LAM-88]
+ _ = x[LBRVIRT-89]
+ _ = x[LZCNT-90]
+ _ = x[MCAOVERFLOW-91]
+ _ = x[MCDT_NO-92]
+ _ = x[MCOMMIT-93]
+ _ = x[MD_CLEAR-94]
+ _ = x[MMX-95]
+ _ = x[MMXEXT-96]
+ _ = x[MOVBE-97]
+ _ = x[MOVDIR64B-98]
+ _ = x[MOVDIRI-99]
+ _ = x[MOVSB_ZL-100]
+ _ = x[MOVU-101]
+ _ = x[MPX-102]
+ _ = x[MSRIRC-103]
+ _ = x[MSR_PAGEFLUSH-104]
+ _ = x[NRIPS-105]
+ _ = x[NX-106]
+ _ = x[OSXSAVE-107]
+ _ = x[PCONFIG-108]
+ _ = x[POPCNT-109]
+ _ = x[PPIN-110]
+ _ = x[PREFETCHI-111]
+ _ = x[PSFD-112]
+ _ = x[RDPRU-113]
+ _ = x[RDRAND-114]
+ _ = x[RDSEED-115]
+ _ = x[RDTSCP-116]
+ _ = x[RTM-117]
+ _ = x[RTM_ALWAYS_ABORT-118]
+ _ = x[SERIALIZE-119]
+ _ = x[SEV-120]
+ _ = x[SEV_64BIT-121]
+ _ = x[SEV_ALTERNATIVE-122]
+ _ = x[SEV_DEBUGSWAP-123]
+ _ = x[SEV_ES-124]
+ _ = x[SEV_RESTRICTED-125]
+ _ = x[SEV_SNP-126]
+ _ = x[SGX-127]
+ _ = x[SGXLC-128]
+ _ = x[SHA-129]
+ _ = x[SME-130]
+ _ = x[SME_COHERENT-131]
+ _ = x[SPEC_CTRL_SSBD-132]
+ _ = x[SRBDS_CTRL-133]
+ _ = x[SSE-134]
+ _ = x[SSE2-135]
+ _ = x[SSE3-136]
+ _ = x[SSE4-137]
+ _ = x[SSE42-138]
+ _ = x[SSE4A-139]
+ _ = x[SSSE3-140]
+ _ = x[STIBP-141]
+ _ = x[STIBP_ALWAYSON-142]
+ _ = x[STOSB_SHORT-143]
+ _ = x[SUCCOR-144]
+ _ = x[SVM-145]
+ _ = x[SVMDA-146]
+ _ = x[SVMFBASID-147]
+ _ = x[SVML-148]
+ _ = x[SVMNP-149]
+ _ = x[SVMPF-150]
+ _ = x[SVMPFT-151]
+ _ = x[SYSCALL-152]
+ _ = x[SYSEE-153]
+ _ = x[TBM-154]
+ _ = x[TLB_FLUSH_NESTED-155]
+ _ = x[TME-156]
+ _ = x[TOPEXT-157]
+ _ = x[TSCRATEMSR-158]
+ _ = x[TSXLDTRK-159]
+ _ = x[VAES-160]
+ _ = x[VMCBCLEAN-161]
+ _ = x[VMPL-162]
+ _ = x[VMSA_REGPROT-163]
+ _ = x[VMX-164]
+ _ = x[VPCLMULQDQ-165]
+ _ = x[VTE-166]
+ _ = x[WAITPKG-167]
+ _ = x[WBNOINVD-168]
+ _ = x[X87-169]
+ _ = x[XGETBV1-170]
+ _ = x[XOP-171]
+ _ = x[XSAVE-172]
+ _ = x[XSAVEC-173]
+ _ = x[XSAVEOPT-174]
+ _ = x[XSAVES-175]
+ _ = x[AESARM-176]
+ _ = x[ARMCPUID-177]
+ _ = x[ASIMD-178]
+ _ = x[ASIMDDP-179]
+ _ = x[ASIMDHP-180]
+ _ = x[ASIMDRDM-181]
+ _ = x[ATOMICS-182]
+ _ = x[CRC32-183]
+ _ = x[DCPOP-184]
+ _ = x[EVTSTRM-185]
+ _ = x[FCMA-186]
+ _ = x[FP-187]
+ _ = x[FPHP-188]
+ _ = x[GPA-189]
+ _ = x[JSCVT-190]
+ _ = x[LRCPC-191]
+ _ = x[PMULL-192]
+ _ = x[SHA1-193]
+ _ = x[SHA2-194]
+ _ = x[SHA3-195]
+ _ = x[SHA512-196]
+ _ = x[SM3-197]
+ _ = x[SM4-198]
+ _ = x[SVE-199]
+ _ = x[lastID-200]
_ = x[firstID-0]
}
-const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWAVXVNNIBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHRESETHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_PREVENTHOSTINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTOPEXTTMETSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4INT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
-var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 237, 241, 245, 251, 256, 264, 269, 275, 279, 297, 305, 312, 316, 322, 326, 330, 334, 338, 342, 349, 353, 356, 362, 365, 368, 378, 382, 385, 395, 406, 412, 420, 431, 439, 451, 467, 482, 492, 499, 503, 506, 513, 518, 529, 536, 539, 545, 550, 559, 566, 574, 577, 583, 596, 601, 603, 610, 617, 623, 628, 634, 640, 646, 649, 665, 674, 677, 686, 701, 714, 720, 734, 741, 744, 749, 752, 755, 767, 770, 774, 778, 782, 787, 792, 797, 802, 813, 819, 822, 827, 836, 840, 845, 850, 856, 863, 868, 871, 877, 880, 890, 898, 902, 911, 915, 927, 930, 940, 943, 950, 958, 961, 968, 971, 976, 982, 990, 996, 1002, 1010, 1015, 1022, 1029, 1037, 1044, 1049, 1054, 1061, 1065, 1067, 1071, 1074, 1079, 1084, 1089, 1093, 1097, 1101, 1107, 1110, 1113, 1116, 1122}
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 278, 282, 288, 293, 301, 306, 312, 316, 325, 343, 351, 358, 362, 366, 380, 386, 390, 394, 403, 407, 411, 416, 421, 425, 429, 436, 440, 443, 449, 452, 455, 465, 475, 488, 501, 505, 509, 523, 540, 543, 553, 564, 570, 578, 589, 597, 609, 625, 639, 650, 660, 675, 683, 693, 700, 704, 707, 714, 719, 730, 737, 744, 752, 755, 761, 766, 775, 782, 790, 794, 797, 803, 816, 821, 823, 830, 837, 843, 847, 856, 860, 865, 871, 877, 883, 886, 902, 911, 914, 923, 938, 951, 957, 971, 978, 981, 986, 989, 992, 1004, 1018, 1028, 1031, 1035, 1039, 1043, 1048, 1053, 1058, 1063, 1077, 1088, 1094, 1097, 1102, 1111, 1115, 1120, 1125, 1131, 1138, 1143, 1146, 1162, 1165, 1171, 1181, 1189, 1193, 1202, 1206, 1218, 1221, 1231, 1234, 1241, 1249, 1252, 1259, 1262, 1267, 1273, 1281, 1287, 1293, 1301, 1306, 1313, 1320, 1328, 1335, 1340, 1345, 1352, 1356, 1358, 1362, 1365, 1370, 1375, 1380, 1384, 1388, 1392, 1398, 1401, 1404, 1407, 1413}
func (i FeatureID) String() string {
if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
index d91d02109..84b1acd21 100644
--- a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
@@ -83,7 +83,7 @@ func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
c.Model = sysctlGetInt(0, "machdep.cpu.model")
c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
- c.Cache.L1D = sysctlGetInt64(-1, "hw.l1icachesize")
+ c.Cache.L1D = sysctlGetInt64(-1, "hw.l1dcachesize")
c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object.go b/vendor/github.com/minio/minio-go/v7/api-put-object.go
index 2376ee874..b29df17d4 100644
--- a/vendor/github.com/minio/minio-go/v7/api-put-object.go
+++ b/vendor/github.com/minio/minio-go/v7/api-put-object.go
@@ -93,6 +93,28 @@ type PutObjectOptions struct {
// This can be used for faster uploads on non-seekable or slow-to-seek input.
ConcurrentStreamParts bool
Internal AdvancedPutOptions
+
+ customHeaders http.Header
+}
+
+// SetMatchETag if etag matches while PUT MinIO returns an error
+// this is a MinIO specific extension to support optimistic locking
+// semantics.
+func (opts *PutObjectOptions) SetMatchETag(etag string) {
+ if opts.customHeaders == nil {
+ opts.customHeaders = http.Header{}
+ }
+ opts.customHeaders.Set("If-Match", "\""+etag+"\"")
+}
+
+// SetMatchETagExcept if etag does not match while PUT MinIO returns an
+// error this is a MinIO specific extension to support optimistic locking
+// semantics.
+func (opts *PutObjectOptions) SetMatchETagExcept(etag string) {
+ if opts.customHeaders == nil {
+ opts.customHeaders = http.Header{}
+ }
+ opts.customHeaders.Set("If-None-Match", "\""+etag+"\"")
}
// getNumThreads - gets the number of threads to be used in the multipart
@@ -187,6 +209,12 @@ func (opts PutObjectOptions) Header() (header http.Header) {
header.Set("x-amz-meta-"+k, v)
}
}
+
+ // set any other additional custom headers.
+ for k, v := range opts.customHeaders {
+ header[k] = v
+ }
+
return
}
diff --git a/vendor/github.com/minio/minio-go/v7/api.go b/vendor/github.com/minio/minio-go/v7/api.go
index c26fe6e72..7ec7a620b 100644
--- a/vendor/github.com/minio/minio-go/v7/api.go
+++ b/vendor/github.com/minio/minio-go/v7/api.go
@@ -118,7 +118,7 @@ type Options struct {
// Global constants.
const (
libraryName = "minio-go"
- libraryVersion = "v7.0.48"
+ libraryVersion = "v7.0.49"
)
// User Agent should always following the below style.
diff --git a/vendor/modules.txt b/vendor/modules.txt
index f06d16caf..1d41877b2 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -163,8 +163,8 @@ github.com/dsoprea/go-png-image-structure/v2
## explicit; go 1.12
github.com/dsoprea/go-utility/v2/filesystem
github.com/dsoprea/go-utility/v2/image
-# github.com/dustin/go-humanize v1.0.0
-## explicit
+# github.com/dustin/go-humanize v1.0.1
+## explicit; go 1.16
github.com/dustin/go-humanize
# github.com/fsnotify/fsnotify v1.6.0
## explicit; go 1.16
@@ -324,14 +324,14 @@ github.com/json-iterator/go
# github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51
## explicit
github.com/kballard/go-shellquote
-# github.com/klauspost/compress v1.15.9
-## explicit; go 1.16
+# github.com/klauspost/compress v1.15.15
+## explicit; go 1.17
github.com/klauspost/compress/flate
github.com/klauspost/compress/gzip
github.com/klauspost/compress/s2
github.com/klauspost/compress/snappy
github.com/klauspost/compress/zlib
-# github.com/klauspost/cpuid/v2 v2.1.1
+# github.com/klauspost/cpuid/v2 v2.2.3
## explicit; go 1.15
github.com/klauspost/cpuid/v2
# github.com/leodido/go-urn v1.2.1
@@ -353,7 +353,7 @@ github.com/miekg/dns
# github.com/minio/md5-simd v1.1.2
## explicit; go 1.14
github.com/minio/md5-simd
-# github.com/minio/minio-go/v7 v7.0.48
+# github.com/minio/minio-go/v7 v7.0.49
## explicit; go 1.17
github.com/minio/minio-go/v7
github.com/minio/minio-go/v7/pkg/credentials