summaryrefslogtreecommitdiff
path: root/vendor/github.com
diff options
context:
space:
mode:
authorLibravatar kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com>2023-10-31 11:12:22 +0000
committerLibravatar GitHub <noreply@github.com>2023-10-31 11:12:22 +0000
commitce71a5a7902963538fc54583588850563f6746cc (patch)
tree3e869eba6d25d2db5fe81184ffee595e451b3147 /vendor/github.com
parent[bugfix] Relax `Mention` parsing, allowing either href or name (#2320) (diff)
downloadgotosocial-ce71a5a7902963538fc54583588850563f6746cc.tar.xz
[feature] add per-uri dereferencer locks (#2291)
Diffstat (limited to 'vendor/github.com')
-rw-r--r--vendor/github.com/klauspost/compress/flate/deflate.go29
-rw-r--r--vendor/github.com/klauspost/compress/flate/fast_encoder.go23
-rw-r--r--vendor/github.com/klauspost/compress/flate/inflate.go66
-rw-r--r--vendor/github.com/klauspost/compress/flate/inflate_gen.go34
-rw-r--r--vendor/github.com/klauspost/compress/flate/level5.go398
-rw-r--r--vendor/github.com/klauspost/compress/flate/matchlen_amd64.go16
-rw-r--r--vendor/github.com/klauspost/compress/flate/matchlen_amd64.s68
-rw-r--r--vendor/github.com/klauspost/compress/flate/matchlen_generic.go33
-rw-r--r--vendor/github.com/klauspost/compress/gzip/gunzip.go1
-rw-r--r--vendor/github.com/klauspost/compress/gzip/gzip.go21
-rw-r--r--vendor/github.com/klauspost/compress/s2/dict.go19
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode.go2
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_best.go3
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_go.go2
-rw-r--r--vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s1610
-rw-r--r--vendor/github.com/klauspost/compress/s2/index.go20
16 files changed, 1858 insertions, 487 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index 5faea0b2b..de912e187 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -7,6 +7,7 @@ package flate
import (
"encoding/binary"
+ "errors"
"fmt"
"io"
"math"
@@ -833,6 +834,12 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.initDeflate()
d.fill = (*compressor).fillDeflate
d.step = (*compressor).deflateLazy
+ case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize:
+ d.w.logNewTablePenalty = 7
+ d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize}
+ d.window = make([]byte, maxStoreBlockSize)
+ d.fill = (*compressor).fillBlock
+ d.step = (*compressor).storeFast
default:
return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level)
}
@@ -929,6 +936,28 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
return zw, err
}
+// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
+const MinCustomWindowSize = 32
+
+// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
+const MaxCustomWindowSize = windowSize
+
+// NewWriterWindow returns a new Writer compressing data with a custom window size.
+// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
+func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
+ if windowSize < MinCustomWindowSize {
+ return nil, errors.New("flate: requested window size less than MinWindowSize")
+ }
+ if windowSize > MaxCustomWindowSize {
+ return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize")
+ }
+ var dw Writer
+ if err := dw.d.init(w, -windowSize); err != nil {
+ return nil, err
+ }
+ return &dw, nil
+}
+
// A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {
diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
index 24caf5f70..c8124b5c4 100644
--- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@@ -8,7 +8,6 @@ package flate
import (
"encoding/binary"
"fmt"
- "math/bits"
)
type fastEnc interface {
@@ -192,25 +191,3 @@ func (e *fastGen) Reset() {
}
e.hist = e.hist[:0]
}
-
-// matchLen returns the maximum length.
-// 'a' must be the shortest of the two.
-func matchLen(a, b []byte) int {
- var checked int
-
- for len(a) >= 8 {
- if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
- return checked + (bits.TrailingZeros64(diff) >> 3)
- }
- checked += 8
- a = a[8:]
- b = b[8:]
- }
- b = b[:len(a)]
- for i := range a {
- if a[i] != b[i] {
- return i + checked
- }
- }
- return len(a) + checked
-}
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
index 414c0bea9..2f410d64f 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -120,8 +120,9 @@ func (h *huffmanDecoder) init(lengths []int) bool {
const sanity = false
if h.chunks == nil {
- h.chunks = &[huffmanNumChunks]uint16{}
+ h.chunks = new([huffmanNumChunks]uint16)
}
+
if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}
@@ -175,6 +176,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
}
h.maxRead = min
+
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
@@ -202,8 +204,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
if cap(h.links[off]) < numLinks {
h.links[off] = make([]uint16, numLinks)
} else {
- links := h.links[off][:0]
- h.links[off] = links[:numLinks]
+ h.links[off] = h.links[off][:numLinks]
}
}
} else {
@@ -277,7 +278,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
return true
}
-// The actual read interface needed by NewReader.
+// Reader is the actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
type Reader interface {
@@ -285,6 +286,18 @@ type Reader interface {
io.ByteReader
}
+type step uint8
+
+const (
+ copyData step = iota + 1
+ nextBlock
+ huffmanBytesBuffer
+ huffmanBytesReader
+ huffmanBufioReader
+ huffmanStringsReader
+ huffmanGenericReader
+)
+
// Decompress state.
type decompressor struct {
// Input source.
@@ -303,7 +316,7 @@ type decompressor struct {
// Next step in the decompression,
// and decompression state.
- step func(*decompressor)
+ step step
stepState int
err error
toRead []byte
@@ -342,7 +355,7 @@ func (f *decompressor) nextBlock() {
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
- f.huffmanBlockDecoder()()
+ f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("predefinied huffman block")
}
@@ -353,7 +366,7 @@ func (f *decompressor) nextBlock() {
}
f.hl = &f.h1
f.hd = &f.h2
- f.huffmanBlockDecoder()()
+ f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("dynamic huffman block")
}
@@ -379,14 +392,16 @@ func (f *decompressor) Read(b []byte) (int, error) {
if f.err != nil {
return 0, f.err
}
- f.step(f)
+
+ f.doStep()
+
if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
}
}
-// Support the io.WriteTo interface for io.Copy and friends.
+// WriteTo implements the io.WriteTo interface for io.Copy and friends.
func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
total := int64(0)
flushed := false
@@ -410,7 +425,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
return total, f.err
}
if f.err == nil {
- f.step(f)
+ f.doStep()
}
if len(f.toRead) == 0 && f.err != nil && !flushed {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
@@ -631,7 +646,7 @@ func (f *decompressor) copyData() {
if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
- f.step = (*decompressor).copyData
+ f.step = copyData
return
}
f.finishBlock()
@@ -644,7 +659,28 @@ func (f *decompressor) finishBlock() {
}
f.err = io.EOF
}
- f.step = (*decompressor).nextBlock
+ f.step = nextBlock
+}
+
+func (f *decompressor) doStep() {
+ switch f.step {
+ case copyData:
+ f.copyData()
+ case nextBlock:
+ f.nextBlock()
+ case huffmanBytesBuffer:
+ f.huffmanBytesBuffer()
+ case huffmanBytesReader:
+ f.huffmanBytesReader()
+ case huffmanBufioReader:
+ f.huffmanBufioReader()
+ case huffmanStringsReader:
+ f.huffmanStringsReader()
+ case huffmanGenericReader:
+ f.huffmanGenericReader()
+ default:
+ panic("BUG: unexpected step state")
+ }
}
// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
@@ -747,7 +783,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
h1: f.h1,
h2: f.h2,
dict: f.dict,
- step: (*decompressor).nextBlock,
+ step: nextBlock,
}
f.dict.init(maxMatchOffset, dict)
return nil
@@ -768,7 +804,7 @@ func NewReader(r io.Reader) io.ReadCloser {
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
- f.step = (*decompressor).nextBlock
+ f.step = nextBlock
f.dict.init(maxMatchOffset, nil)
return &f
}
@@ -787,7 +823,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
- f.step = (*decompressor).nextBlock
+ f.step = nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}
diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
index 61342b6b8..2b2f993f7 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go
@@ -85,7 +85,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanBytesBuffer
+ f.step = huffmanBytesBuffer
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@@ -251,7 +251,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work
+ f.step = huffmanBytesBuffer // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@@ -336,7 +336,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanBytesReader
+ f.step = huffmanBytesReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@@ -502,7 +502,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanBytesReader // We need to continue this work
+ f.step = huffmanBytesReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@@ -587,7 +587,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanBufioReader
+ f.step = huffmanBufioReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@@ -753,7 +753,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanBufioReader // We need to continue this work
+ f.step = huffmanBufioReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@@ -838,7 +838,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanStringsReader
+ f.step = huffmanStringsReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@@ -1004,7 +1004,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanStringsReader // We need to continue this work
+ f.step = huffmanStringsReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@@ -1089,7 +1089,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanGenericReader
+ f.step = huffmanGenericReader
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
@@ -1255,7 +1255,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
- f.step = (*decompressor).huffmanGenericReader // We need to continue this work
+ f.step = huffmanGenericReader // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
@@ -1265,19 +1265,19 @@ copyHistory:
// Not reached
}
-func (f *decompressor) huffmanBlockDecoder() func() {
+func (f *decompressor) huffmanBlockDecoder() {
switch f.r.(type) {
case *bytes.Buffer:
- return f.huffmanBytesBuffer
+ f.huffmanBytesBuffer()
case *bytes.Reader:
- return f.huffmanBytesReader
+ f.huffmanBytesReader()
case *bufio.Reader:
- return f.huffmanBufioReader
+ f.huffmanBufioReader()
case *strings.Reader:
- return f.huffmanStringsReader
+ f.huffmanStringsReader()
case Reader:
- return f.huffmanGenericReader
+ f.huffmanGenericReader()
default:
- return f.huffmanGenericReader
+ f.huffmanGenericReader()
}
}
diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go
index 83ef50ba4..1f61ec182 100644
--- a/vendor/github.com/klauspost/compress/flate/level5.go
+++ b/vendor/github.com/klauspost/compress/flate/level5.go
@@ -308,3 +308,401 @@ emitRemainder:
emitLiteral(dst, src[nextEmit:])
}
}
+
+// fastEncL5Window is a level 5 encoder,
+// but with a custom window size.
+type fastEncL5Window struct {
+ hist []byte
+ cur int32
+ maxOffset int32
+ table [tableSize]tableEntry
+ bTable [tableSize]tableEntryPrev
+}
+
+func (e *fastEncL5Window) Encode(dst *tokens, src []byte) {
+ const (
+ inputMargin = 12 - 1
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ hashShortBytes = 4
+ )
+ maxMatchOffset := e.maxOffset
+ if debugDeflate && e.cur < 0 {
+ panic(fmt.Sprint("e.cur < 0: ", e.cur))
+ }
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.bTable[:] {
+ e.bTable[i] = tableEntryPrev{}
+ }
+ e.cur = maxMatchOffset
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v <= minOff {
+ v = 0
+ } else {
+ v = v - e.cur + maxMatchOffset
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.bTable[:] {
+ v := e.bTable[i]
+ if v.Cur.offset <= minOff {
+ v.Cur.offset = 0
+ v.Prev.offset = 0
+ } else {
+ v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
+ if v.Prev.offset <= minOff {
+ v.Prev.offset = 0
+ } else {
+ v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
+ }
+ }
+ e.bTable[i] = v
+ }
+ e.cur = maxMatchOffset
+ }
+
+ s := e.addBlock(src)
+
+ // This check isn't in the Snappy implementation, but there, the caller
+ // instead of the callee handles this case.
+ if len(src) < minNonLiteralBlockSize {
+ // We do not fill the token table.
+ // This will be picked up by caller.
+ dst.n = uint16(len(src))
+ return
+ }
+
+ // Override src
+ src = e.hist
+ nextEmit := s
+
+ // sLimit is when to stop looking for offset/length copies. The inputMargin
+ // lets us use a fast path for emitLiteral in the main loop, while we are
+ // looking for copies.
+ sLimit := int32(len(src) - inputMargin)
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ cv := load6432(src, s)
+ for {
+ const skipLog = 6
+ const doEvery = 1
+
+ nextS := s
+ var l int32
+ var t int32
+ for {
+ nextHashS := hashLen(cv, tableBits, hashShortBytes)
+ nextHashL := hash7(cv, tableBits)
+
+ s = nextS
+ nextS = s + doEvery + (s-nextEmit)>>skipLog
+ if nextS > sLimit {
+ goto emitRemainder
+ }
+ // Fetch a short+long candidate
+ sCandidate := e.table[nextHashS]
+ lCandidate := e.bTable[nextHashL]
+ next := load6432(src, nextS)
+ entry := tableEntry{offset: s + e.cur}
+ e.table[nextHashS] = entry
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = entry, eLong.Cur
+
+ nextHashS = hashLen(next, tableBits, hashShortBytes)
+ nextHashL = hash7(next, tableBits)
+
+ t = lCandidate.Cur.offset - e.cur
+ if s-t < maxMatchOffset {
+ if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
+ // Store the next match
+ e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+ t2 := lCandidate.Prev.offset - e.cur
+ if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+ l = e.matchlen(s+4, t+4, src) + 4
+ ml1 := e.matchlen(s+4, t2+4, src) + 4
+ if ml1 > l {
+ t = t2
+ l = ml1
+ break
+ }
+ }
+ break
+ }
+ t = lCandidate.Prev.offset - e.cur
+ if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
+ // Store the next match
+ e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+ break
+ }
+ }
+
+ t = sCandidate.offset - e.cur
+ if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
+ // Found a 4 match...
+ l = e.matchlen(s+4, t+4, src) + 4
+ lCandidate = e.bTable[nextHashL]
+ // Store the next match
+
+ e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
+ eLong := &e.bTable[nextHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
+
+ // If the next long is a candidate, use that...
+ t2 := lCandidate.Cur.offset - e.cur
+ if nextS-t2 < maxMatchOffset {
+ if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
+ ml := e.matchlen(nextS+4, t2+4, src) + 4
+ if ml > l {
+ t = t2
+ s = nextS
+ l = ml
+ break
+ }
+ }
+ // If the previous long is a candidate, use that...
+ t2 = lCandidate.Prev.offset - e.cur
+ if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
+ ml := e.matchlen(nextS+4, t2+4, src) + 4
+ if ml > l {
+ t = t2
+ s = nextS
+ l = ml
+ break
+ }
+ }
+ }
+ break
+ }
+ cv = next
+ }
+
+ // A 4-byte match has been found. We'll later see if more than 4 bytes
+ // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+ // them as literal bytes.
+
+ if l == 0 {
+ // Extend the 4-byte match as long as possible.
+ l = e.matchlenLong(s+4, t+4, src) + 4
+ } else if l == maxMatchLength {
+ l += e.matchlenLong(s+l, t+l, src)
+ }
+
+ // Try to locate a better match by checking the end of best match...
+ if sAt := s + l; l < 30 && sAt < sLimit {
+ // Allow some bytes at the beginning to mismatch.
+ // Sweet spot is 2/3 bytes depending on input.
+ // 3 is only a little better when it is but sometimes a lot worse.
+ // The skipped bytes are tested in Extend backwards,
+ // and still picked up as part of the match if they do.
+ const skipBeginning = 2
+ eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
+ t2 := eLong - e.cur - l + skipBeginning
+ s2 := s + skipBeginning
+ off := s2 - t2
+ if t2 >= 0 && off < maxMatchOffset && off > 0 {
+ if l2 := e.matchlenLong(s2, t2, src); l2 > l {
+ t = t2
+ l = l2
+ s = s2
+ }
+ }
+ }
+
+ // Extend backwards
+ for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+ if nextEmit < s {
+ if false {
+ emitLiteral(dst, src[nextEmit:s])
+ } else {
+ for _, v := range src[nextEmit:s] {
+ dst.tokens[dst.n] = token(v)
+ dst.litHist[v]++
+ dst.n++
+ }
+ }
+ }
+ if debugDeflate {
+ if t >= s {
+ panic(fmt.Sprintln("s-t", s, t))
+ }
+ if (s - t) > maxMatchOffset {
+ panic(fmt.Sprintln("mmo", s-t))
+ }
+ if l < baseMatchLength {
+ panic("bml")
+ }
+ }
+
+ dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
+ s += l
+ nextEmit = s
+ if nextS >= s {
+ s = nextS + 1
+ }
+
+ if s >= sLimit {
+ goto emitRemainder
+ }
+
+ // Store every 3rd hash in-between.
+ if true {
+ const hashEvery = 3
+ i := s - l + 1
+ if i < s-1 {
+ cv := load6432(src, i)
+ t := tableEntry{offset: i + e.cur}
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+ eLong := &e.bTable[hash7(cv, tableBits)]
+ eLong.Cur, eLong.Prev = t, eLong.Cur
+
+ // Do an long at i+1
+ cv >>= 8
+ t = tableEntry{offset: t.offset + 1}
+ eLong = &e.bTable[hash7(cv, tableBits)]
+ eLong.Cur, eLong.Prev = t, eLong.Cur
+
+ // We only have enough bits for a short entry at i+2
+ cv >>= 8
+ t = tableEntry{offset: t.offset + 1}
+ e.table[hashLen(cv, tableBits, hashShortBytes)] = t
+
+ // Skip one - otherwise we risk hitting 's'
+ i += 4
+ for ; i < s-1; i += hashEvery {
+ cv := load6432(src, i)
+ t := tableEntry{offset: i + e.cur}
+ t2 := tableEntry{offset: t.offset + 1}
+ eLong := &e.bTable[hash7(cv, tableBits)]
+ eLong.Cur, eLong.Prev = t, eLong.Cur
+ e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
+ }
+ }
+ }
+
+ // We could immediately start working at s now, but to improve
+ // compression we first update the hash table at s-1 and at s.
+ x := load6432(src, s-1)
+ o := e.cur + s - 1
+ prevHashS := hashLen(x, tableBits, hashShortBytes)
+ prevHashL := hash7(x, tableBits)
+ e.table[prevHashS] = tableEntry{offset: o}
+ eLong := &e.bTable[prevHashL]
+ eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
+ cv = x >> 8
+ }
+
+emitRemainder:
+ if int(nextEmit) < len(src) {
+ // If nothing was added, don't encode literals.
+ if dst.n == 0 {
+ return
+ }
+
+ emitLiteral(dst, src[nextEmit:])
+ }
+}
+
+// Reset the encoding table.
+func (e *fastEncL5Window) Reset() {
+ // We keep the same allocs, since we are compressing the same block sizes.
+ if cap(e.hist) < allocHistory {
+ e.hist = make([]byte, 0, allocHistory)
+ }
+
+ // We offset current position so everything will be out of reach.
+ // If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
+ if e.cur <= int32(bufferReset) {
+ e.cur += e.maxOffset + int32(len(e.hist))
+ }
+ e.hist = e.hist[:0]
+}
+
+func (e *fastEncL5Window) addBlock(src []byte) int32 {
+ // check if we have space already
+ maxMatchOffset := e.maxOffset
+
+ if len(e.hist)+len(src) > cap(e.hist) {
+ if cap(e.hist) == 0 {
+ e.hist = make([]byte, 0, allocHistory)
+ } else {
+ if cap(e.hist) < int(maxMatchOffset*2) {
+ panic("unexpected buffer size")
+ }
+ // Move down
+ offset := int32(len(e.hist)) - maxMatchOffset
+ copy(e.hist[0:maxMatchOffset], e.hist[offset:])
+ e.cur += offset
+ e.hist = e.hist[:maxMatchOffset]
+ }
+ }
+ s := int32(len(e.hist))
+ e.hist = append(e.hist, src...)
+ return s
+}
+
+// matchlen will return the match length between offsets and t in src.
+// The maximum length returned is maxMatchLength - 4.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 {
+ if debugDecode {
+ if t >= s {
+ panic(fmt.Sprint("t >=s:", t, s))
+ }
+ if int(s) >= len(src) {
+ panic(fmt.Sprint("s >= len(src):", s, len(src)))
+ }
+ if t < 0 {
+ panic(fmt.Sprint("t < 0:", t))
+ }
+ if s-t > e.maxOffset {
+ panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+ }
+ }
+ s1 := int(s) + maxMatchLength - 4
+ if s1 > len(src) {
+ s1 = len(src)
+ }
+
+ // Extend the match to be as long as possible.
+ return int32(matchLen(src[s:s1], src[t:]))
+}
+
+// matchlenLong will return the match length between offsets and t in src.
+// It is assumed that s > t, that t >=0 and s < len(src).
+func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 {
+ if debugDeflate {
+ if t >= s {
+ panic(fmt.Sprint("t >=s:", t, s))
+ }
+ if int(s) >= len(src) {
+ panic(fmt.Sprint("s >= len(src):", s, len(src)))
+ }
+ if t < 0 {
+ panic(fmt.Sprint("t < 0:", t))
+ }
+ if s-t > e.maxOffset {
+ panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
+ }
+ }
+ // Extend the match to be as long as possible.
+ return int32(matchLen(src[s:], src[t:]))
+}
diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go
new file mode 100644
index 000000000..4bd388584
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go
@@ -0,0 +1,16 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package flate
+
+// matchLen returns how many bytes match in a and b
+//
+// It assumes that:
+//
+// len(a) <= len(b) and len(a) > 0
+//
+//go:noescape
+func matchLen(a []byte, b []byte) int
diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s
new file mode 100644
index 000000000..9a7655c0f
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s
@@ -0,0 +1,68 @@
+// Copied from S2 implementation.
+
+//go:build !appengine && !noasm && gc && !noasm
+
+#include "textflag.h"
+
+// func matchLen(a []byte, b []byte) int
+// Requires: BMI
+TEXT ·matchLen(SB), NOSPLIT, $0-56
+ MOVQ a_base+0(FP), AX
+ MOVQ b_base+24(FP), CX
+ MOVQ a_len+8(FP), DX
+
+ // matchLen
+ XORL SI, SI
+ CMPL DX, $0x08
+ JB matchlen_match4_standalone
+
+matchlen_loopback_standalone:
+ MOVQ (AX)(SI*1), BX
+ XORQ (CX)(SI*1), BX
+ TESTQ BX, BX
+ JZ matchlen_loop_standalone
+
+#ifdef GOAMD64_v3
+ TZCNTQ BX, BX
+#else
+ BSFQ BX, BX
+#endif
+ SARQ $0x03, BX
+ LEAL (SI)(BX*1), SI
+ JMP gen_match_len_end
+
+matchlen_loop_standalone:
+ LEAL -8(DX), DX
+ LEAL 8(SI), SI
+ CMPL DX, $0x08
+ JAE matchlen_loopback_standalone
+
+matchlen_match4_standalone:
+ CMPL DX, $0x04
+ JB matchlen_match2_standalone
+ MOVL (AX)(SI*1), BX
+ CMPL (CX)(SI*1), BX
+ JNE matchlen_match2_standalone
+ LEAL -4(DX), DX
+ LEAL 4(SI), SI
+
+matchlen_match2_standalone:
+ CMPL DX, $0x02
+ JB matchlen_match1_standalone
+ MOVW (AX)(SI*1), BX
+ CMPW (CX)(SI*1), BX
+ JNE matchlen_match1_standalone
+ LEAL -2(DX), DX
+ LEAL 2(SI), SI
+
+matchlen_match1_standalone:
+ CMPL DX, $0x01
+ JB gen_match_len_end
+ MOVB (AX)(SI*1), BL
+ CMPB (CX)(SI*1), BL
+ JNE gen_match_len_end
+ INCL SI
+
+gen_match_len_end:
+ MOVQ SI, ret+48(FP)
+ RET
diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_generic.go b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go
new file mode 100644
index 000000000..ad5cd814b
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go
@@ -0,0 +1,33 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package flate
+
+import (
+ "encoding/binary"
+ "math/bits"
+)
+
+// matchLen returns the maximum common prefix length of a and b.
+// a must be the shortest of the two.
+func matchLen(a, b []byte) (n int) {
+ for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
+ diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
+ if diff != 0 {
+ return n + bits.TrailingZeros64(diff)>>3
+ }
+ n += 8
+ }
+
+ for i := range a {
+ if a[i] != b[i] {
+ break
+ }
+ n++
+ }
+ return n
+
+}
diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go
index 6d630c390..dc2362a63 100644
--- a/vendor/github.com/klauspost/compress/gzip/gunzip.go
+++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go
@@ -106,6 +106,7 @@ func (z *Reader) Reset(r io.Reader) error {
*z = Reader{
decompressor: z.decompressor,
multistream: true,
+ br: z.br,
}
if rr, ok := r.(flate.Reader); ok {
z.r = rr
diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go
index 26203851b..5bc720593 100644
--- a/vendor/github.com/klauspost/compress/gzip/gzip.go
+++ b/vendor/github.com/klauspost/compress/gzip/gzip.go
@@ -74,6 +74,27 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
return z, nil
}
+// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow.
+const MinCustomWindowSize = flate.MinCustomWindowSize
+
+// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow.
+const MaxCustomWindowSize = flate.MaxCustomWindowSize
+
+// NewWriterWindow returns a new Writer compressing data with a custom window size.
+// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize.
+func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) {
+ if windowSize < MinCustomWindowSize {
+ return nil, errors.New("gzip: requested window size less than MinWindowSize")
+ }
+ if windowSize > MaxCustomWindowSize {
+ return nil, errors.New("gzip: requested window size bigger than MaxCustomWindowSize")
+ }
+
+ z := new(Writer)
+ z.init(w, -windowSize)
+ return z, nil
+}
+
func (z *Writer) init(w io.Writer, level int) {
compressor := z.compressor
if level != StatelessCompression {
diff --git a/vendor/github.com/klauspost/compress/s2/dict.go b/vendor/github.com/klauspost/compress/s2/dict.go
index 24f7ce80b..f125ad096 100644
--- a/vendor/github.com/klauspost/compress/s2/dict.go
+++ b/vendor/github.com/klauspost/compress/s2/dict.go
@@ -106,6 +106,25 @@ func MakeDict(data []byte, searchStart []byte) *Dict {
return &d
}
+// MakeDictManual will create a dictionary.
+// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.
+// A manual first repeat index into data must be provided.
+// It must be less than len(data)-8.
+func MakeDictManual(data []byte, firstIdx uint16) *Dict {
+ if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize {
+ return nil
+ }
+ var d Dict
+ dict := data
+ d.dict = dict
+ if cap(d.dict) < len(d.dict)+16 {
+ d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
+ }
+
+ d.repeat = int(firstIdx)
+ return &d
+}
+
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go
index e6c231021..0c9088adf 100644
--- a/vendor/github.com/klauspost/compress/s2/encode.go
+++ b/vendor/github.com/klauspost/compress/s2/encode.go
@@ -57,7 +57,7 @@ func Encode(dst, src []byte) []byte {
// The function returns -1 if no improvement could be achieved.
// Using actual compression will most often produce better compression than the estimate.
func EstimateBlockSize(src []byte) (d int) {
- if len(src) < 6 || int64(len(src)) > 0xffffffff {
+ if len(src) <= inputMargin || int64(len(src)) > 0xffffffff {
return -1
}
if len(src) <= 1024 {
diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go
index 1d13e869a..47bac7423 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_best.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_best.go
@@ -157,6 +157,9 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
return m
}
matchDict := func(candidate, s int, first uint32, rep bool) match {
+ if s >= MaxDictSrcOffset {
+ return match{offset: candidate, s: s}
+ }
// Calculate offset as if in continuous array with s
offset := -len(dict.dict) + candidate
if best.length != 0 && best.s-best.offset == s-offset && !rep {
diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go
index 0d39c7b0e..6b393c34d 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_go.go
@@ -316,6 +316,7 @@ func matchLen(a []byte, b []byte) int {
return len(a) + checked
}
+// input must be > inputMargin
func calcBlockSize(src []byte) (d int) {
// Initialize the hash table.
const (
@@ -501,6 +502,7 @@ emitRemainder:
return d
}
+// length must be > inputMargin.
func calcBlockSizeSmall(src []byte) (d int) {
// Initialize the hash table.
const (
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
index 54031aa31..5f110d194 100644
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
@@ -249,15 +249,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_repeat_extend_encodeBlockAsm:
+ CMPL R8, $0x10
+ JB matchlen_match8_repeat_extend_encodeBlockAsm
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm
+ XORQ 8(BX)(R11*1), R12
+ JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm
+
+matchlen_bsf_16repeat_extend_encodeBlockAsm:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP repeat_extend_forward_end_encodeBlockAsm
+
+matchlen_match8_repeat_extend_encodeBlockAsm:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm
+ MOVQ (R9)(R11*1), R10
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
+ JMP matchlen_match4_repeat_extend_encodeBlockAsm
-matchlen_loopback_repeat_extend_encodeBlockAsm:
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeBlockAsm
-
+matchlen_bsf_8_repeat_extend_encodeBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -269,12 +297,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm:
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm
-matchlen_loop_repeat_extend_encodeBlockAsm:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JAE matchlen_loopback_repeat_extend_encodeBlockAsm
-
matchlen_match4_repeat_extend_encodeBlockAsm:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm
@@ -851,15 +873,43 @@ match_nolit_loop_encodeBlockAsm:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeBlockAsm:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeBlockAsm
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeBlockAsm
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeBlockAsm
+
+matchlen_bsf_16match_nolit_encodeBlockAsm:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeBlockAsm
+
+matchlen_match8_match_nolit_encodeBlockAsm:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeBlockAsm
-matchlen_loopback_match_nolit_encodeBlockAsm:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeBlockAsm
-
+matchlen_bsf_8_match_nolit_encodeBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -871,12 +921,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeBlockAsm
-matchlen_loop_match_nolit_encodeBlockAsm:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBlockAsm
-
matchlen_match4_match_nolit_encodeBlockAsm:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm
@@ -1610,15 +1654,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm4MB:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB:
+ CMPL R8, $0x10
+ JB matchlen_match8_repeat_extend_encodeBlockAsm4MB
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
+ XORQ 8(BX)(R11*1), R12
+ JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB
+
+matchlen_bsf_16repeat_extend_encodeBlockAsm4MB:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP repeat_extend_forward_end_encodeBlockAsm4MB
+
+matchlen_match8_repeat_extend_encodeBlockAsm4MB:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm4MB
+ MOVQ (R9)(R11*1), R10
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
+ JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB
-matchlen_loopback_repeat_extend_encodeBlockAsm4MB:
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB
-
+matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -1630,12 +1702,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm4MB:
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm4MB
-matchlen_loop_repeat_extend_encodeBlockAsm4MB:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JAE matchlen_loopback_repeat_extend_encodeBlockAsm4MB
-
matchlen_match4_repeat_extend_encodeBlockAsm4MB:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm4MB
@@ -2162,15 +2228,43 @@ match_nolit_loop_encodeBlockAsm4MB:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeBlockAsm4MB:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeBlockAsm4MB
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB
+
+matchlen_bsf_16match_nolit_encodeBlockAsm4MB:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeBlockAsm4MB
+
+matchlen_match8_match_nolit_encodeBlockAsm4MB:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm4MB
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeBlockAsm4MB
-matchlen_loopback_match_nolit_encodeBlockAsm4MB:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeBlockAsm4MB
-
+matchlen_bsf_8_match_nolit_encodeBlockAsm4MB:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -2182,12 +2276,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm4MB:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeBlockAsm4MB
-matchlen_loop_match_nolit_encodeBlockAsm4MB:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBlockAsm4MB
-
matchlen_match4_match_nolit_encodeBlockAsm4MB:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm4MB
@@ -2873,15 +2961,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm12B:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_repeat_extend_encodeBlockAsm12B:
+ CMPL R8, $0x10
+ JB matchlen_match8_repeat_extend_encodeBlockAsm12B
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
+ XORQ 8(BX)(R11*1), R12
+ JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B
+
+matchlen_bsf_16repeat_extend_encodeBlockAsm12B:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP repeat_extend_forward_end_encodeBlockAsm12B
+
+matchlen_match8_repeat_extend_encodeBlockAsm12B:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm12B
+ MOVQ (R9)(R11*1), R10
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
+ JMP matchlen_match4_repeat_extend_encodeBlockAsm12B
-matchlen_loopback_repeat_extend_encodeBlockAsm12B:
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeBlockAsm12B
-
+matchlen_bsf_8_repeat_extend_encodeBlockAsm12B:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -2893,12 +3009,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm12B:
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm12B
-matchlen_loop_repeat_extend_encodeBlockAsm12B:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JAE matchlen_loopback_repeat_extend_encodeBlockAsm12B
-
matchlen_match4_repeat_extend_encodeBlockAsm12B:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm12B
@@ -3303,15 +3413,43 @@ match_nolit_loop_encodeBlockAsm12B:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeBlockAsm12B:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeBlockAsm12B
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B
+
+matchlen_bsf_16match_nolit_encodeBlockAsm12B:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeBlockAsm12B
+
+matchlen_match8_match_nolit_encodeBlockAsm12B:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm12B
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeBlockAsm12B
-matchlen_loopback_match_nolit_encodeBlockAsm12B:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeBlockAsm12B
-
+matchlen_bsf_8_match_nolit_encodeBlockAsm12B:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -3323,12 +3461,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm12B:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeBlockAsm12B
-matchlen_loop_match_nolit_encodeBlockAsm12B:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBlockAsm12B
-
matchlen_match4_match_nolit_encodeBlockAsm12B:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm12B
@@ -3904,15 +4036,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm10B:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_repeat_extend_encodeBlockAsm10B:
+ CMPL R8, $0x10
+ JB matchlen_match8_repeat_extend_encodeBlockAsm10B
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
+ XORQ 8(BX)(R11*1), R12
+ JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B
+
+matchlen_bsf_16repeat_extend_encodeBlockAsm10B:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP repeat_extend_forward_end_encodeBlockAsm10B
+
+matchlen_match8_repeat_extend_encodeBlockAsm10B:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm10B
+ MOVQ (R9)(R11*1), R10
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
+ JMP matchlen_match4_repeat_extend_encodeBlockAsm10B
-matchlen_loopback_repeat_extend_encodeBlockAsm10B:
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeBlockAsm10B
-
+matchlen_bsf_8_repeat_extend_encodeBlockAsm10B:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -3924,12 +4084,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm10B:
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm10B
-matchlen_loop_repeat_extend_encodeBlockAsm10B:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JAE matchlen_loopback_repeat_extend_encodeBlockAsm10B
-
matchlen_match4_repeat_extend_encodeBlockAsm10B:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm10B
@@ -4334,15 +4488,43 @@ match_nolit_loop_encodeBlockAsm10B:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeBlockAsm10B:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeBlockAsm10B
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B
+
+matchlen_bsf_16match_nolit_encodeBlockAsm10B:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeBlockAsm10B
+
+matchlen_match8_match_nolit_encodeBlockAsm10B:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm10B
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeBlockAsm10B
-matchlen_loopback_match_nolit_encodeBlockAsm10B:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeBlockAsm10B
-
+matchlen_bsf_8_match_nolit_encodeBlockAsm10B:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -4354,12 +4536,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm10B:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeBlockAsm10B
-matchlen_loop_match_nolit_encodeBlockAsm10B:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBlockAsm10B
-
matchlen_match4_match_nolit_encodeBlockAsm10B:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm10B
@@ -4935,15 +5111,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm8B:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_repeat_extend_encodeBlockAsm8B:
+ CMPL R8, $0x10
+ JB matchlen_match8_repeat_extend_encodeBlockAsm8B
+ MOVQ (R9)(R11*1), R10
+ MOVQ 8(R9)(R11*1), R12
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
+ XORQ 8(BX)(R11*1), R12
+ JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B
+ LEAL -16(R8), R8
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B
+
+matchlen_bsf_16repeat_extend_encodeBlockAsm8B:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP repeat_extend_forward_end_encodeBlockAsm8B
+
+matchlen_match8_repeat_extend_encodeBlockAsm8B:
CMPL R8, $0x08
JB matchlen_match4_repeat_extend_encodeBlockAsm8B
+ MOVQ (R9)(R11*1), R10
+ XORQ (BX)(R11*1), R10
+ JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
+ LEAL -8(R8), R8
+ LEAL 8(R11), R11
+ JMP matchlen_match4_repeat_extend_encodeBlockAsm8B
-matchlen_loopback_repeat_extend_encodeBlockAsm8B:
- MOVQ (R9)(R11*1), R10
- XORQ (BX)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeBlockAsm8B
-
+matchlen_bsf_8_repeat_extend_encodeBlockAsm8B:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -4955,12 +5159,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm8B:
LEAL (R11)(R10*1), R11
JMP repeat_extend_forward_end_encodeBlockAsm8B
-matchlen_loop_repeat_extend_encodeBlockAsm8B:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JAE matchlen_loopback_repeat_extend_encodeBlockAsm8B
-
matchlen_match4_repeat_extend_encodeBlockAsm8B:
CMPL R8, $0x04
JB matchlen_match2_repeat_extend_encodeBlockAsm8B
@@ -5351,15 +5549,43 @@ match_nolit_loop_encodeBlockAsm8B:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeBlockAsm8B:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeBlockAsm8B
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B
+
+matchlen_bsf_16match_nolit_encodeBlockAsm8B:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeBlockAsm8B
+
+matchlen_match8_match_nolit_encodeBlockAsm8B:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeBlockAsm8B
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeBlockAsm8B
-matchlen_loopback_match_nolit_encodeBlockAsm8B:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeBlockAsm8B
-
+matchlen_bsf_8_match_nolit_encodeBlockAsm8B:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -5371,12 +5597,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm8B:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeBlockAsm8B
-matchlen_loop_match_nolit_encodeBlockAsm8B:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBlockAsm8B
-
matchlen_match4_match_nolit_encodeBlockAsm8B:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeBlockAsm8B
@@ -5854,15 +6074,43 @@ match_dst_size_check_encodeBetterBlockAsm:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeBetterBlockAsm:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeBetterBlockAsm
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm
+
+matchlen_bsf_16match_nolit_encodeBetterBlockAsm:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeBetterBlockAsm
+
+matchlen_match8_match_nolit_encodeBetterBlockAsm:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeBetterBlockAsm
-matchlen_loopback_match_nolit_encodeBetterBlockAsm:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm
-
+matchlen_bsf_8_match_nolit_encodeBetterBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -5874,12 +6122,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm
-matchlen_loop_match_nolit_encodeBetterBlockAsm:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm
-
matchlen_match4_match_nolit_encodeBetterBlockAsm:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm
@@ -6926,15 +7168,43 @@ match_dst_size_check_encodeBetterBlockAsm4MB:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB
+
+matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeBetterBlockAsm4MB
+
+matchlen_match8_match_nolit_encodeBetterBlockAsm4MB:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
-matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB
-
+matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -6946,12 +7216,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm4MB
-matchlen_loop_match_nolit_encodeBetterBlockAsm4MB:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB
-
matchlen_match4_match_nolit_encodeBetterBlockAsm4MB:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
@@ -7924,15 +8188,43 @@ match_dst_size_check_encodeBetterBlockAsm12B:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B
+
+matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeBetterBlockAsm12B
+
+matchlen_match8_match_nolit_encodeBetterBlockAsm12B:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B
-matchlen_loopback_match_nolit_encodeBetterBlockAsm12B:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B
-
+matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -7944,12 +8236,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm12B:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm12B
-matchlen_loop_match_nolit_encodeBetterBlockAsm12B:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B
-
matchlen_match4_match_nolit_encodeBetterBlockAsm12B:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B
@@ -8775,15 +9061,43 @@ match_dst_size_check_encodeBetterBlockAsm10B:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B
+
+matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeBetterBlockAsm10B
+
+matchlen_match8_match_nolit_encodeBetterBlockAsm10B:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B
-matchlen_loopback_match_nolit_encodeBetterBlockAsm10B:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B
-
+matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -8795,12 +9109,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm10B:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm10B
-matchlen_loop_match_nolit_encodeBetterBlockAsm10B:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B
-
matchlen_match4_match_nolit_encodeBetterBlockAsm10B:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B
@@ -9626,15 +9934,43 @@ match_dst_size_check_encodeBetterBlockAsm8B:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B
+
+matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeBetterBlockAsm8B
+
+matchlen_match8_match_nolit_encodeBetterBlockAsm8B:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B
-matchlen_loopback_match_nolit_encodeBetterBlockAsm8B:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B
-
+matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -9646,12 +9982,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm8B:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeBetterBlockAsm8B
-matchlen_loop_match_nolit_encodeBetterBlockAsm8B:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B
-
matchlen_match4_match_nolit_encodeBetterBlockAsm8B:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B
@@ -10575,15 +10905,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
// matchLen
XORL R10, R10
+
+matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm:
+ CMPL DI, $0x10
+ JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
+ XORQ 8(BX)(R10*1), R11
+ JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
+ JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm
+
+matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm:
+#ifdef GOAMD64_v3
+ TZCNTQ R11, R11
+
+#else
+ BSFQ R11, R11
+
+#endif
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
+ JMP repeat_extend_forward_end_encodeSnappyBlockAsm
+
+matchlen_match8_repeat_extend_encodeSnappyBlockAsm:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm
+ MOVQ (R8)(R10*1), R9
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
+ JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm:
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm
-
+matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
@@ -10595,12 +10953,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm:
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeSnappyBlockAsm
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm
-
matchlen_match4_repeat_extend_encodeSnappyBlockAsm:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm
@@ -10897,15 +11249,43 @@ match_nolit_loop_encodeSnappyBlockAsm:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBlockAsm
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm
+
+matchlen_bsf_16match_nolit_encodeSnappyBlockAsm:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeSnappyBlockAsm
+
+matchlen_match8_match_nolit_encodeSnappyBlockAsm:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm
-
+matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -10917,12 +11297,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeSnappyBlockAsm
-matchlen_loop_match_nolit_encodeSnappyBlockAsm:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm
-
matchlen_match4_match_nolit_encodeSnappyBlockAsm:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm
@@ -11437,15 +11811,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
// matchLen
XORL R10, R10
+
+matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K:
+ CMPL DI, $0x10
+ JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
+ XORQ 8(BX)(R10*1), R11
+ JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
+ JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K
+
+matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K:
+#ifdef GOAMD64_v3
+ TZCNTQ R11, R11
+
+#else
+ BSFQ R11, R11
+
+#endif
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
+ JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K
+
+matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
+ MOVQ (R8)(R10*1), R9
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
+ JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K:
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K
-
+matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
@@ -11457,12 +11859,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K:
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K
-
matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
@@ -11719,15 +12115,43 @@ match_nolit_loop_encodeSnappyBlockAsm64K:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K
+
+matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeSnappyBlockAsm64K
+
+matchlen_match8_match_nolit_encodeSnappyBlockAsm64K:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K
-
+matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -11739,12 +12163,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeSnappyBlockAsm64K
-matchlen_loop_match_nolit_encodeSnappyBlockAsm64K:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K
-
matchlen_match4_match_nolit_encodeSnappyBlockAsm64K:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
@@ -12219,15 +12637,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
// matchLen
XORL R10, R10
+
+matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B:
+ CMPL DI, $0x10
+ JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
+ XORQ 8(BX)(R10*1), R11
+ JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
+ JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B
+
+matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B:
+#ifdef GOAMD64_v3
+ TZCNTQ R11, R11
+
+#else
+ BSFQ R11, R11
+
+#endif
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
+ JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B
+
+matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
+ MOVQ (R8)(R10*1), R9
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
+ JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B:
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B
-
+matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
@@ -12239,12 +12685,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B:
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B
-
matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
@@ -12501,15 +12941,43 @@ match_nolit_loop_encodeSnappyBlockAsm12B:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B
+
+matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeSnappyBlockAsm12B
+
+matchlen_match8_match_nolit_encodeSnappyBlockAsm12B:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B
-
+matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -12521,12 +12989,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeSnappyBlockAsm12B
-matchlen_loop_match_nolit_encodeSnappyBlockAsm12B:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B
-
matchlen_match4_match_nolit_encodeSnappyBlockAsm12B:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
@@ -13001,15 +13463,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
// matchLen
XORL R10, R10
+
+matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B:
+ CMPL DI, $0x10
+ JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
+ XORQ 8(BX)(R10*1), R11
+ JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
+ JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B
+
+matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B:
+#ifdef GOAMD64_v3
+ TZCNTQ R11, R11
+
+#else
+ BSFQ R11, R11
+
+#endif
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
+ JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B
+
+matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
+ MOVQ (R8)(R10*1), R9
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
+ JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B:
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B
-
+matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
@@ -13021,12 +13511,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B:
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B
-
matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
@@ -13283,15 +13767,43 @@ match_nolit_loop_encodeSnappyBlockAsm10B:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B
+
+matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeSnappyBlockAsm10B
+
+matchlen_match8_match_nolit_encodeSnappyBlockAsm10B:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B
-
+matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -13303,12 +13815,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeSnappyBlockAsm10B
-matchlen_loop_match_nolit_encodeSnappyBlockAsm10B:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B
-
matchlen_match4_match_nolit_encodeSnappyBlockAsm10B:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
@@ -13783,15 +14289,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
// matchLen
XORL R10, R10
+
+matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B:
+ CMPL DI, $0x10
+ JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
+ XORQ 8(BX)(R10*1), R11
+ JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
+ JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B
+
+matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B:
+#ifdef GOAMD64_v3
+ TZCNTQ R11, R11
+
+#else
+ BSFQ R11, R11
+
+#endif
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
+ JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B
+
+matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
+ MOVQ (R8)(R10*1), R9
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
+ JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B:
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B
-
+matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
@@ -13803,12 +14337,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B:
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B
-
matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
@@ -14063,15 +14591,43 @@ match_nolit_loop_encodeSnappyBlockAsm8B:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B
+
+matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_encodeSnappyBlockAsm8B
+
+matchlen_match8_match_nolit_encodeSnappyBlockAsm8B:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B
-
+matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -14083,12 +14639,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_encodeSnappyBlockAsm8B
-matchlen_loop_match_nolit_encodeSnappyBlockAsm8B:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B
-
matchlen_match4_match_nolit_encodeSnappyBlockAsm8B:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
@@ -14473,15 +15023,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm
+
+matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeSnappyBetterBlockAsm
+
+matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm
-
+matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -14493,12 +15071,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeSnappyBetterBlockAsm
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm
-
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
@@ -15096,15 +15668,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm64K:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K
+
+matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeSnappyBetterBlockAsm64K
+
+matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K
-
+matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -15116,12 +15716,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeSnappyBetterBlockAsm64K
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K
-
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
@@ -15654,15 +16248,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm12B:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B
+
+matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeSnappyBetterBlockAsm12B
+
+matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B
-
+matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -15674,12 +16296,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeSnappyBetterBlockAsm12B
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B
-
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
@@ -16212,15 +16828,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm10B:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B
+
+matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeSnappyBetterBlockAsm10B
+
+matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B
-
+matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -16232,12 +16876,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeSnappyBetterBlockAsm10B
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B
-
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
@@ -16770,15 +17408,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm8B:
// matchLen
XORL R11, R11
+
+matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B:
+ CMPL DI, $0x10
+ JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B
+ MOVQ (R8)(R11*1), R10
+ MOVQ 8(R8)(R11*1), R12
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
+ XORQ 8(R9)(R11*1), R12
+ JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B
+ LEAL -16(DI), DI
+ LEAL 16(R11), R11
+ JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B
+
+matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B:
+#ifdef GOAMD64_v3
+ TZCNTQ R12, R12
+
+#else
+ BSFQ R12, R12
+
+#endif
+ SARQ $0x03, R12
+ LEAL 8(R11)(R12*1), R11
+ JMP match_nolit_end_encodeSnappyBetterBlockAsm8B
+
+matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B:
CMPL DI, $0x08
JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
+ MOVQ (R8)(R11*1), R10
+ XORQ (R9)(R11*1), R10
+ JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
+ LEAL -8(DI), DI
+ LEAL 8(R11), R11
+ JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B:
- MOVQ (R8)(R11*1), R10
- XORQ (R9)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B
-
+matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B:
#ifdef GOAMD64_v3
TZCNTQ R10, R10
@@ -16790,12 +17456,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B:
LEAL (R11)(R10*1), R11
JMP match_nolit_end_encodeSnappyBetterBlockAsm8B
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B:
- LEAL -8(DI), DI
- LEAL 8(R11), R11
- CMPL DI, $0x08
- JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B
-
matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B:
CMPL DI, $0x04
JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
@@ -17343,15 +18003,43 @@ emit_literal_done_repeat_emit_calcBlockSize:
// matchLen
XORL R10, R10
+
+matchlen_loopback_16_repeat_extend_calcBlockSize:
+ CMPL DI, $0x10
+ JB matchlen_match8_repeat_extend_calcBlockSize
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_calcBlockSize
+ XORQ 8(BX)(R10*1), R11
+ JNZ matchlen_bsf_16repeat_extend_calcBlockSize
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
+ JMP matchlen_loopback_16_repeat_extend_calcBlockSize
+
+matchlen_bsf_16repeat_extend_calcBlockSize:
+#ifdef GOAMD64_v3
+ TZCNTQ R11, R11
+
+#else
+ BSFQ R11, R11
+
+#endif
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
+ JMP repeat_extend_forward_end_calcBlockSize
+
+matchlen_match8_repeat_extend_calcBlockSize:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_calcBlockSize
+ MOVQ (R8)(R10*1), R9
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_calcBlockSize
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
+ JMP matchlen_match4_repeat_extend_calcBlockSize
-matchlen_loopback_repeat_extend_calcBlockSize:
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_repeat_extend_calcBlockSize
-
+matchlen_bsf_8_repeat_extend_calcBlockSize:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
@@ -17363,12 +18051,6 @@ matchlen_loopback_repeat_extend_calcBlockSize:
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_calcBlockSize
-matchlen_loop_repeat_extend_calcBlockSize:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JAE matchlen_loopback_repeat_extend_calcBlockSize
-
matchlen_match4_repeat_extend_calcBlockSize:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_calcBlockSize
@@ -17554,15 +18236,43 @@ match_nolit_loop_calcBlockSize:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_calcBlockSize:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_calcBlockSize
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_calcBlockSize
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_calcBlockSize
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_calcBlockSize
+
+matchlen_bsf_16match_nolit_calcBlockSize:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_calcBlockSize
+
+matchlen_match8_match_nolit_calcBlockSize:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_calcBlockSize
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_calcBlockSize
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_calcBlockSize
-matchlen_loopback_match_nolit_calcBlockSize:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_calcBlockSize
-
+matchlen_bsf_8_match_nolit_calcBlockSize:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -17574,12 +18284,6 @@ matchlen_loopback_match_nolit_calcBlockSize:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_calcBlockSize
-matchlen_loop_match_nolit_calcBlockSize:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_calcBlockSize
-
matchlen_match4_match_nolit_calcBlockSize:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_calcBlockSize
@@ -17872,15 +18576,43 @@ emit_literal_done_repeat_emit_calcBlockSizeSmall:
// matchLen
XORL R10, R10
+
+matchlen_loopback_16_repeat_extend_calcBlockSizeSmall:
+ CMPL DI, $0x10
+ JB matchlen_match8_repeat_extend_calcBlockSizeSmall
+ MOVQ (R8)(R10*1), R9
+ MOVQ 8(R8)(R10*1), R11
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
+ XORQ 8(BX)(R10*1), R11
+ JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall
+ LEAL -16(DI), DI
+ LEAL 16(R10), R10
+ JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall
+
+matchlen_bsf_16repeat_extend_calcBlockSizeSmall:
+#ifdef GOAMD64_v3
+ TZCNTQ R11, R11
+
+#else
+ BSFQ R11, R11
+
+#endif
+ SARQ $0x03, R11
+ LEAL 8(R10)(R11*1), R10
+ JMP repeat_extend_forward_end_calcBlockSizeSmall
+
+matchlen_match8_repeat_extend_calcBlockSizeSmall:
CMPL DI, $0x08
JB matchlen_match4_repeat_extend_calcBlockSizeSmall
+ MOVQ (R8)(R10*1), R9
+ XORQ (BX)(R10*1), R9
+ JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
+ LEAL -8(DI), DI
+ LEAL 8(R10), R10
+ JMP matchlen_match4_repeat_extend_calcBlockSizeSmall
-matchlen_loopback_repeat_extend_calcBlockSizeSmall:
- MOVQ (R8)(R10*1), R9
- XORQ (BX)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_repeat_extend_calcBlockSizeSmall
-
+matchlen_bsf_8_repeat_extend_calcBlockSizeSmall:
#ifdef GOAMD64_v3
TZCNTQ R9, R9
@@ -17892,12 +18624,6 @@ matchlen_loopback_repeat_extend_calcBlockSizeSmall:
LEAL (R10)(R9*1), R10
JMP repeat_extend_forward_end_calcBlockSizeSmall
-matchlen_loop_repeat_extend_calcBlockSizeSmall:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JAE matchlen_loopback_repeat_extend_calcBlockSizeSmall
-
matchlen_match4_repeat_extend_calcBlockSizeSmall:
CMPL DI, $0x04
JB matchlen_match2_repeat_extend_calcBlockSizeSmall
@@ -18053,15 +18779,43 @@ match_nolit_loop_calcBlockSizeSmall:
// matchLen
XORL R9, R9
+
+matchlen_loopback_16_match_nolit_calcBlockSizeSmall:
+ CMPL SI, $0x10
+ JB matchlen_match8_match_nolit_calcBlockSizeSmall
+ MOVQ (DI)(R9*1), R8
+ MOVQ 8(DI)(R9*1), R10
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall
+ XORQ 8(BX)(R9*1), R10
+ JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall
+ LEAL -16(SI), SI
+ LEAL 16(R9), R9
+ JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall
+
+matchlen_bsf_16match_nolit_calcBlockSizeSmall:
+#ifdef GOAMD64_v3
+ TZCNTQ R10, R10
+
+#else
+ BSFQ R10, R10
+
+#endif
+ SARQ $0x03, R10
+ LEAL 8(R9)(R10*1), R9
+ JMP match_nolit_end_calcBlockSizeSmall
+
+matchlen_match8_match_nolit_calcBlockSizeSmall:
CMPL SI, $0x08
JB matchlen_match4_match_nolit_calcBlockSizeSmall
+ MOVQ (DI)(R9*1), R8
+ XORQ (BX)(R9*1), R8
+ JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall
+ LEAL -8(SI), SI
+ LEAL 8(R9), R9
+ JMP matchlen_match4_match_nolit_calcBlockSizeSmall
-matchlen_loopback_match_nolit_calcBlockSizeSmall:
- MOVQ (DI)(R9*1), R8
- XORQ (BX)(R9*1), R8
- TESTQ R8, R8
- JZ matchlen_loop_match_nolit_calcBlockSizeSmall
-
+matchlen_bsf_8_match_nolit_calcBlockSizeSmall:
#ifdef GOAMD64_v3
TZCNTQ R8, R8
@@ -18073,12 +18827,6 @@ matchlen_loopback_match_nolit_calcBlockSizeSmall:
LEAL (R9)(R8*1), R9
JMP match_nolit_end_calcBlockSizeSmall
-matchlen_loop_match_nolit_calcBlockSizeSmall:
- LEAL -8(SI), SI
- LEAL 8(R9), R9
- CMPL SI, $0x08
- JAE matchlen_loopback_match_nolit_calcBlockSizeSmall
-
matchlen_match4_match_nolit_calcBlockSizeSmall:
CMPL SI, $0x04
JB matchlen_match2_match_nolit_calcBlockSizeSmall
@@ -18840,15 +19588,43 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56
// matchLen
XORL SI, SI
+
+matchlen_loopback_16_standalone:
+ CMPL DX, $0x10
+ JB matchlen_match8_standalone
+ MOVQ (AX)(SI*1), BX
+ MOVQ 8(AX)(SI*1), DI
+ XORQ (CX)(SI*1), BX
+ JNZ matchlen_bsf_8_standalone
+ XORQ 8(CX)(SI*1), DI
+ JNZ matchlen_bsf_16standalone
+ LEAL -16(DX), DX
+ LEAL 16(SI), SI
+ JMP matchlen_loopback_16_standalone
+
+matchlen_bsf_16standalone:
+#ifdef GOAMD64_v3
+ TZCNTQ DI, DI
+
+#else
+ BSFQ DI, DI
+
+#endif
+ SARQ $0x03, DI
+ LEAL 8(SI)(DI*1), SI
+ JMP gen_match_len_end
+
+matchlen_match8_standalone:
CMPL DX, $0x08
JB matchlen_match4_standalone
+ MOVQ (AX)(SI*1), BX
+ XORQ (CX)(SI*1), BX
+ JNZ matchlen_bsf_8_standalone
+ LEAL -8(DX), DX
+ LEAL 8(SI), SI
+ JMP matchlen_match4_standalone
-matchlen_loopback_standalone:
- MOVQ (AX)(SI*1), BX
- XORQ (CX)(SI*1), BX
- TESTQ BX, BX
- JZ matchlen_loop_standalone
-
+matchlen_bsf_8_standalone:
#ifdef GOAMD64_v3
TZCNTQ BX, BX
@@ -18860,12 +19636,6 @@ matchlen_loopback_standalone:
LEAL (SI)(BX*1), SI
JMP gen_match_len_end
-matchlen_loop_standalone:
- LEAL -8(DX), DX
- LEAL 8(SI), SI
- CMPL DX, $0x08
- JAE matchlen_loopback_standalone
-
matchlen_match4_standalone:
CMPL DX, $0x04
JB matchlen_match2_standalone
diff --git a/vendor/github.com/klauspost/compress/s2/index.go b/vendor/github.com/klauspost/compress/s2/index.go
index dd9ecfe71..18a4f7acd 100644
--- a/vendor/github.com/klauspost/compress/s2/index.go
+++ b/vendor/github.com/klauspost/compress/s2/index.go
@@ -511,24 +511,22 @@ func IndexStream(r io.Reader) ([]byte, error) {
// JSON returns the index as JSON text.
func (i *Index) JSON() []byte {
+ type offset struct {
+ CompressedOffset int64 `json:"compressed"`
+ UncompressedOffset int64 `json:"uncompressed"`
+ }
x := struct {
- TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
- TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
- Offsets []struct {
- CompressedOffset int64 `json:"compressed"`
- UncompressedOffset int64 `json:"uncompressed"`
- } `json:"offsets"`
- EstBlockUncomp int64 `json:"est_block_uncompressed"`
+ TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
+ TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
+ Offsets []offset `json:"offsets"`
+ EstBlockUncomp int64 `json:"est_block_uncompressed"`
}{
TotalUncompressed: i.TotalUncompressed,
TotalCompressed: i.TotalCompressed,
EstBlockUncomp: i.estBlockUncomp,
}
for _, v := range i.info {
- x.Offsets = append(x.Offsets, struct {
- CompressedOffset int64 `json:"compressed"`
- UncompressedOffset int64 `json:"uncompressed"`
- }{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
+ x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
}
b, _ := json.MarshalIndent(x, "", " ")
return b