diff options
author | 2023-10-31 11:12:22 +0000 | |
---|---|---|
committer | 2023-10-31 11:12:22 +0000 | |
commit | ce71a5a7902963538fc54583588850563f6746cc (patch) | |
tree | 3e869eba6d25d2db5fe81184ffee595e451b3147 /vendor/github.com | |
parent | [bugfix] Relax `Mention` parsing, allowing either href or name (#2320) (diff) | |
download | gotosocial-ce71a5a7902963538fc54583588850563f6746cc.tar.xz |
[feature] add per-uri dereferencer locks (#2291)
Diffstat (limited to 'vendor/github.com')
16 files changed, 1858 insertions, 487 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 5faea0b2b..de912e187 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -7,6 +7,7 @@ package flate import ( "encoding/binary" + "errors" "fmt" "io" "math" @@ -833,6 +834,12 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.initDeflate() d.fill = (*compressor).fillDeflate d.step = (*compressor).deflateLazy + case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize: + d.w.logNewTablePenalty = 7 + d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize} + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast default: return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) } @@ -929,6 +936,28 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { return zw, err } +// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow. +const MinCustomWindowSize = 32 + +// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow. +const MaxCustomWindowSize = windowSize + +// NewWriterWindow returns a new Writer compressing data with a custom window size. +// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize. +func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) { + if windowSize < MinCustomWindowSize { + return nil, errors.New("flate: requested window size less than MinWindowSize") + } + if windowSize > MaxCustomWindowSize { + return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize") + } + var dw Writer + if err := dw.d.init(w, -windowSize); err != nil { + return nil, err + } + return &dw, nil +} + // A Writer takes data written to it and writes the compressed // form of that data to an underlying writer (see NewWriter). type Writer struct { diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index 24caf5f70..c8124b5c4 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -8,7 +8,6 @@ package flate import ( "encoding/binary" "fmt" - "math/bits" ) type fastEnc interface { @@ -192,25 +191,3 @@ func (e *fastGen) Reset() { } e.hist = e.hist[:0] } - -// matchLen returns the maximum length. -// 'a' must be the shortest of the two. -func matchLen(a, b []byte) int { - var checked int - - for len(a) >= 8 { - if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { - return checked + (bits.TrailingZeros64(diff) >> 3) - } - checked += 8 - a = a[8:] - b = b[8:] - } - b = b[:len(a)] - for i := range a { - if a[i] != b[i] { - return i + checked - } - } - return len(a) + checked -} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 414c0bea9..2f410d64f 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -120,8 +120,9 @@ func (h *huffmanDecoder) init(lengths []int) bool { const sanity = false if h.chunks == nil { - h.chunks = &[huffmanNumChunks]uint16{} + h.chunks = new([huffmanNumChunks]uint16) } + if h.maxRead != 0 { *h = huffmanDecoder{chunks: h.chunks, links: h.links} } @@ -175,6 +176,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { } h.maxRead = min + chunks := h.chunks[:] for i := range chunks { chunks[i] = 0 @@ -202,8 +204,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { if cap(h.links[off]) < numLinks { h.links[off] = make([]uint16, numLinks) } else { - links := h.links[off][:0] - h.links[off] = links[:numLinks] + h.links[off] = h.links[off][:numLinks] } } } else { @@ -277,7 +278,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { return true } -// The actual read interface needed by NewReader. +// Reader is the actual read interface needed by NewReader. // If the passed in io.Reader does not also have ReadByte, // the NewReader will introduce its own buffering. type Reader interface { @@ -285,6 +286,18 @@ type Reader interface { io.ByteReader } +type step uint8 + +const ( + copyData step = iota + 1 + nextBlock + huffmanBytesBuffer + huffmanBytesReader + huffmanBufioReader + huffmanStringsReader + huffmanGenericReader +) + // Decompress state. type decompressor struct { // Input source. @@ -303,7 +316,7 @@ type decompressor struct { // Next step in the decompression, // and decompression state. - step func(*decompressor) + step step stepState int err error toRead []byte @@ -342,7 +355,7 @@ func (f *decompressor) nextBlock() { // compressed, fixed Huffman tables f.hl = &fixedHuffmanDecoder f.hd = nil - f.huffmanBlockDecoder()() + f.huffmanBlockDecoder() if debugDecode { fmt.Println("predefinied huffman block") } @@ -353,7 +366,7 @@ func (f *decompressor) nextBlock() { } f.hl = &f.h1 f.hd = &f.h2 - f.huffmanBlockDecoder()() + f.huffmanBlockDecoder() if debugDecode { fmt.Println("dynamic huffman block") } @@ -379,14 +392,16 @@ func (f *decompressor) Read(b []byte) (int, error) { if f.err != nil { return 0, f.err } - f.step(f) + + f.doStep() + if f.err != nil && len(f.toRead) == 0 { f.toRead = f.dict.readFlush() // Flush what's left in case of error } } } -// Support the io.WriteTo interface for io.Copy and friends. +// WriteTo implements the io.WriteTo interface for io.Copy and friends. func (f *decompressor) WriteTo(w io.Writer) (int64, error) { total := int64(0) flushed := false @@ -410,7 +425,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) { return total, f.err } if f.err == nil { - f.step(f) + f.doStep() } if len(f.toRead) == 0 && f.err != nil && !flushed { f.toRead = f.dict.readFlush() // Flush what's left in case of error @@ -631,7 +646,7 @@ func (f *decompressor) copyData() { if f.dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).copyData + f.step = copyData return } f.finishBlock() @@ -644,7 +659,28 @@ func (f *decompressor) finishBlock() { } f.err = io.EOF } - f.step = (*decompressor).nextBlock + f.step = nextBlock +} + +func (f *decompressor) doStep() { + switch f.step { + case copyData: + f.copyData() + case nextBlock: + f.nextBlock() + case huffmanBytesBuffer: + f.huffmanBytesBuffer() + case huffmanBytesReader: + f.huffmanBytesReader() + case huffmanBufioReader: + f.huffmanBufioReader() + case huffmanStringsReader: + f.huffmanStringsReader() + case huffmanGenericReader: + f.huffmanGenericReader() + default: + panic("BUG: unexpected step state") + } } // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. @@ -747,7 +783,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { h1: f.h1, h2: f.h2, dict: f.dict, - step: (*decompressor).nextBlock, + step: nextBlock, } f.dict.init(maxMatchOffset, dict) return nil @@ -768,7 +804,7 @@ func NewReader(r io.Reader) io.ReadCloser { f.r = makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock + f.step = nextBlock f.dict.init(maxMatchOffset, nil) return &f } @@ -787,7 +823,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { f.r = makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock + f.step = nextBlock f.dict.init(maxMatchOffset, dict) return &f } diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go index 61342b6b8..2b2f993f7 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -85,7 +85,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer + f.step = huffmanBytesBuffer f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -251,7 +251,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work + f.step = huffmanBytesBuffer // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -336,7 +336,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader + f.step = huffmanBytesReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -502,7 +502,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader // We need to continue this work + f.step = huffmanBytesReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -587,7 +587,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader + f.step = huffmanBufioReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -753,7 +753,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader // We need to continue this work + f.step = huffmanBufioReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -838,7 +838,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader + f.step = huffmanStringsReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -1004,7 +1004,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.step = huffmanStringsReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -1089,7 +1089,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader + f.step = huffmanGenericReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -1255,7 +1255,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader // We need to continue this work + f.step = huffmanGenericReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -1265,19 +1265,19 @@ copyHistory: // Not reached } -func (f *decompressor) huffmanBlockDecoder() func() { +func (f *decompressor) huffmanBlockDecoder() { switch f.r.(type) { case *bytes.Buffer: - return f.huffmanBytesBuffer + f.huffmanBytesBuffer() case *bytes.Reader: - return f.huffmanBytesReader + f.huffmanBytesReader() case *bufio.Reader: - return f.huffmanBufioReader + f.huffmanBufioReader() case *strings.Reader: - return f.huffmanStringsReader + f.huffmanStringsReader() case Reader: - return f.huffmanGenericReader + f.huffmanGenericReader() default: - return f.huffmanGenericReader + f.huffmanGenericReader() } } diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index 83ef50ba4..1f61ec182 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -308,3 +308,401 @@ emitRemainder: emitLiteral(dst, src[nextEmit:]) } } + +// fastEncL5Window is a level 5 encoder, +// but with a custom window size. +type fastEncL5Window struct { + hist []byte + cur int32 + maxOffset int32 + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5Window) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 + ) + maxMatchOffset := e.maxOffset + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hashLen(cv, tableBits, hashShortBytes) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hashLen(next, tableBits, hashShortBytes) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + if l == 0 { + // Extend the 4-byte match as long as possible. + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + t2 := eLong - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { + t = t2 + l = l2 + s = s2 + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hashLen(x, tableBits, hashShortBytes) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} + +// Reset the encoding table. +func (e *fastEncL5Window) Reset() { + // We keep the same allocs, since we are compressing the same block sizes. + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= int32(bufferReset) { + e.cur += e.maxOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} + +func (e *fastEncL5Window) addBlock(src []byte) int32 { + // check if we have space already + maxMatchOffset := e.maxOffset + + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < int(maxMatchOffset*2) { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + s1 := int(s) + maxMatchLength - 4 + if s1 > len(src) { + s1 = len(src) + } + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:s1], src[t:])) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 { + if debugDeflate { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go new file mode 100644 index 000000000..4bd388584 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go @@ -0,0 +1,16 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package flate + +// matchLen returns how many bytes match in a and b +// +// It assumes that: +// +// len(a) <= len(b) and len(a) > 0 +// +//go:noescape +func matchLen(a []byte, b []byte) int diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s new file mode 100644 index 000000000..9a7655c0f --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s @@ -0,0 +1,68 @@ +// Copied from S2 implementation. + +//go:build !appengine && !noasm && gc && !noasm + +#include "textflag.h" + +// func matchLen(a []byte, b []byte) int +// Requires: BMI +TEXT ·matchLen(SB), NOSPLIT, $0-56 + MOVQ a_base+0(FP), AX + MOVQ b_base+24(FP), CX + MOVQ a_len+8(FP), DX + + // matchLen + XORL SI, SI + CMPL DX, $0x08 + JB matchlen_match4_standalone + +matchlen_loopback_standalone: + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + TESTQ BX, BX + JZ matchlen_loop_standalone + +#ifdef GOAMD64_v3 + TZCNTQ BX, BX +#else + BSFQ BX, BX +#endif + SARQ $0x03, BX + LEAL (SI)(BX*1), SI + JMP gen_match_len_end + +matchlen_loop_standalone: + LEAL -8(DX), DX + LEAL 8(SI), SI + CMPL DX, $0x08 + JAE matchlen_loopback_standalone + +matchlen_match4_standalone: + CMPL DX, $0x04 + JB matchlen_match2_standalone + MOVL (AX)(SI*1), BX + CMPL (CX)(SI*1), BX + JNE matchlen_match2_standalone + LEAL -4(DX), DX + LEAL 4(SI), SI + +matchlen_match2_standalone: + CMPL DX, $0x02 + JB matchlen_match1_standalone + MOVW (AX)(SI*1), BX + CMPW (CX)(SI*1), BX + JNE matchlen_match1_standalone + LEAL -2(DX), DX + LEAL 2(SI), SI + +matchlen_match1_standalone: + CMPL DX, $0x01 + JB gen_match_len_end + MOVB (AX)(SI*1), BL + CMPB (CX)(SI*1), BL + JNE gen_match_len_end + INCL SI + +gen_match_len_end: + MOVQ SI, ret+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_generic.go b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go new file mode 100644 index 000000000..ad5cd814b --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go @@ -0,0 +1,33 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "math/bits" +) + +// matchLen returns the maximum common prefix length of a and b. +// a must be the shortest of the two. +func matchLen(a, b []byte) (n int) { + for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { + diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 + } + n += 8 + } + + for i := range a { + if a[i] != b[i] { + break + } + n++ + } + return n + +} diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go index 6d630c390..dc2362a63 100644 --- a/vendor/github.com/klauspost/compress/gzip/gunzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go @@ -106,6 +106,7 @@ func (z *Reader) Reset(r io.Reader) error { *z = Reader{ decompressor: z.decompressor, multistream: true, + br: z.br, } if rr, ok := r.(flate.Reader); ok { z.r = rr diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go index 26203851b..5bc720593 100644 --- a/vendor/github.com/klauspost/compress/gzip/gzip.go +++ b/vendor/github.com/klauspost/compress/gzip/gzip.go @@ -74,6 +74,27 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) { return z, nil } +// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow. +const MinCustomWindowSize = flate.MinCustomWindowSize + +// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow. +const MaxCustomWindowSize = flate.MaxCustomWindowSize + +// NewWriterWindow returns a new Writer compressing data with a custom window size. +// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize. +func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) { + if windowSize < MinCustomWindowSize { + return nil, errors.New("gzip: requested window size less than MinWindowSize") + } + if windowSize > MaxCustomWindowSize { + return nil, errors.New("gzip: requested window size bigger than MaxCustomWindowSize") + } + + z := new(Writer) + z.init(w, -windowSize) + return z, nil +} + func (z *Writer) init(w io.Writer, level int) { compressor := z.compressor if level != StatelessCompression { diff --git a/vendor/github.com/klauspost/compress/s2/dict.go b/vendor/github.com/klauspost/compress/s2/dict.go index 24f7ce80b..f125ad096 100644 --- a/vendor/github.com/klauspost/compress/s2/dict.go +++ b/vendor/github.com/klauspost/compress/s2/dict.go @@ -106,6 +106,25 @@ func MakeDict(data []byte, searchStart []byte) *Dict { return &d } +// MakeDictManual will create a dictionary. +// 'data' must be at least MinDictSize and less than or equal to MaxDictSize. +// A manual first repeat index into data must be provided. +// It must be less than len(data)-8. +func MakeDictManual(data []byte, firstIdx uint16) *Dict { + if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize { + return nil + } + var d Dict + dict := data + d.dict = dict + if cap(d.dict) < len(d.dict)+16 { + d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...) + } + + d.repeat = int(firstIdx) + return &d +} + // Encode returns the encoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire encoded block. // Otherwise, a newly allocated slice will be returned. diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go index e6c231021..0c9088adf 100644 --- a/vendor/github.com/klauspost/compress/s2/encode.go +++ b/vendor/github.com/klauspost/compress/s2/encode.go @@ -57,7 +57,7 @@ func Encode(dst, src []byte) []byte { // The function returns -1 if no improvement could be achieved. // Using actual compression will most often produce better compression than the estimate. func EstimateBlockSize(src []byte) (d int) { - if len(src) < 6 || int64(len(src)) > 0xffffffff { + if len(src) <= inputMargin || int64(len(src)) > 0xffffffff { return -1 } if len(src) <= 1024 { diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go index 1d13e869a..47bac7423 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_best.go +++ b/vendor/github.com/klauspost/compress/s2/encode_best.go @@ -157,6 +157,9 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) { return m } matchDict := func(candidate, s int, first uint32, rep bool) match { + if s >= MaxDictSrcOffset { + return match{offset: candidate, s: s} + } // Calculate offset as if in continuous array with s offset := -len(dict.dict) + candidate if best.length != 0 && best.s-best.offset == s-offset && !rep { diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index 0d39c7b0e..6b393c34d 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -316,6 +316,7 @@ func matchLen(a []byte, b []byte) int { return len(a) + checked } +// input must be > inputMargin func calcBlockSize(src []byte) (d int) { // Initialize the hash table. const ( @@ -501,6 +502,7 @@ emitRemainder: return d } +// length must be > inputMargin. func calcBlockSizeSmall(src []byte) (d int) { // Initialize the hash table. const ( diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 54031aa31..5f110d194 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -249,15 +249,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm + +matchlen_bsf_16repeat_extend_encodeBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm + +matchlen_match8_repeat_extend_encodeBlockAsm: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm -matchlen_loopback_repeat_extend_encodeBlockAsm: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm - +matchlen_bsf_8_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -269,12 +297,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm -matchlen_loop_repeat_extend_encodeBlockAsm: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm - matchlen_match4_repeat_extend_encodeBlockAsm: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm @@ -851,15 +873,43 @@ match_nolit_loop_encodeBlockAsm: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm + +matchlen_bsf_16match_nolit_encodeBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm + +matchlen_match8_match_nolit_encodeBlockAsm: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm -matchlen_loopback_match_nolit_encodeBlockAsm: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm - +matchlen_bsf_8_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -871,12 +921,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm -matchlen_loop_match_nolit_encodeBlockAsm: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm - matchlen_match4_match_nolit_encodeBlockAsm: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm @@ -1610,15 +1654,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm4MB: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm4MB + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB + +matchlen_bsf_16repeat_extend_encodeBlockAsm4MB: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm4MB + +matchlen_match8_repeat_extend_encodeBlockAsm4MB: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm4MB + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB -matchlen_loopback_repeat_extend_encodeBlockAsm4MB: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB - +matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -1630,12 +1702,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm4MB: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm4MB -matchlen_loop_repeat_extend_encodeBlockAsm4MB: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm4MB - matchlen_match4_repeat_extend_encodeBlockAsm4MB: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm4MB @@ -2162,15 +2228,43 @@ match_nolit_loop_encodeBlockAsm4MB: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm4MB: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm4MB + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB + +matchlen_bsf_16match_nolit_encodeBlockAsm4MB: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm4MB + +matchlen_match8_match_nolit_encodeBlockAsm4MB: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm4MB + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm4MB -matchlen_loopback_match_nolit_encodeBlockAsm4MB: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm4MB - +matchlen_bsf_8_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -2182,12 +2276,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm4MB: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm4MB -matchlen_loop_match_nolit_encodeBlockAsm4MB: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm4MB - matchlen_match4_match_nolit_encodeBlockAsm4MB: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm4MB @@ -2873,15 +2961,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm12B: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm12B: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm12B + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B + +matchlen_bsf_16repeat_extend_encodeBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm12B + +matchlen_match8_repeat_extend_encodeBlockAsm12B: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm12B + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm12B -matchlen_loopback_repeat_extend_encodeBlockAsm12B: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm12B - +matchlen_bsf_8_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -2893,12 +3009,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm12B: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm12B -matchlen_loop_repeat_extend_encodeBlockAsm12B: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm12B - matchlen_match4_repeat_extend_encodeBlockAsm12B: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm12B @@ -3303,15 +3413,43 @@ match_nolit_loop_encodeBlockAsm12B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm12B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm12B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B + +matchlen_bsf_16match_nolit_encodeBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm12B + +matchlen_match8_match_nolit_encodeBlockAsm12B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm12B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm12B -matchlen_loopback_match_nolit_encodeBlockAsm12B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm12B - +matchlen_bsf_8_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -3323,12 +3461,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm12B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm12B -matchlen_loop_match_nolit_encodeBlockAsm12B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm12B - matchlen_match4_match_nolit_encodeBlockAsm12B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm12B @@ -3904,15 +4036,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm10B: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm10B: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm10B + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B + +matchlen_bsf_16repeat_extend_encodeBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm10B + +matchlen_match8_repeat_extend_encodeBlockAsm10B: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm10B + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm10B -matchlen_loopback_repeat_extend_encodeBlockAsm10B: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm10B - +matchlen_bsf_8_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -3924,12 +4084,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm10B: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm10B -matchlen_loop_repeat_extend_encodeBlockAsm10B: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm10B - matchlen_match4_repeat_extend_encodeBlockAsm10B: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm10B @@ -4334,15 +4488,43 @@ match_nolit_loop_encodeBlockAsm10B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm10B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm10B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B + +matchlen_bsf_16match_nolit_encodeBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm10B + +matchlen_match8_match_nolit_encodeBlockAsm10B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm10B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm10B -matchlen_loopback_match_nolit_encodeBlockAsm10B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm10B - +matchlen_bsf_8_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -4354,12 +4536,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm10B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm10B -matchlen_loop_match_nolit_encodeBlockAsm10B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm10B - matchlen_match4_match_nolit_encodeBlockAsm10B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm10B @@ -4935,15 +5111,43 @@ emit_literal_done_repeat_emit_encodeBlockAsm8B: // matchLen XORL R11, R11 + +matchlen_loopback_16_repeat_extend_encodeBlockAsm8B: + CMPL R8, $0x10 + JB matchlen_match8_repeat_extend_encodeBlockAsm8B + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B + XORQ 8(BX)(R11*1), R12 + JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B + LEAL -16(R8), R8 + LEAL 16(R11), R11 + JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B + +matchlen_bsf_16repeat_extend_encodeBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP repeat_extend_forward_end_encodeBlockAsm8B + +matchlen_match8_repeat_extend_encodeBlockAsm8B: CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm8B + MOVQ (R9)(R11*1), R10 + XORQ (BX)(R11*1), R10 + JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B + LEAL -8(R8), R8 + LEAL 8(R11), R11 + JMP matchlen_match4_repeat_extend_encodeBlockAsm8B -matchlen_loopback_repeat_extend_encodeBlockAsm8B: - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_repeat_extend_encodeBlockAsm8B - +matchlen_bsf_8_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -4955,12 +5159,6 @@ matchlen_loopback_repeat_extend_encodeBlockAsm8B: LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeBlockAsm8B -matchlen_loop_repeat_extend_encodeBlockAsm8B: - LEAL -8(R8), R8 - LEAL 8(R11), R11 - CMPL R8, $0x08 - JAE matchlen_loopback_repeat_extend_encodeBlockAsm8B - matchlen_match4_repeat_extend_encodeBlockAsm8B: CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm8B @@ -5351,15 +5549,43 @@ match_nolit_loop_encodeBlockAsm8B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeBlockAsm8B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeBlockAsm8B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B + +matchlen_bsf_16match_nolit_encodeBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeBlockAsm8B + +matchlen_match8_match_nolit_encodeBlockAsm8B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm8B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeBlockAsm8B -matchlen_loopback_match_nolit_encodeBlockAsm8B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeBlockAsm8B - +matchlen_bsf_8_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -5371,12 +5597,6 @@ matchlen_loopback_match_nolit_encodeBlockAsm8B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeBlockAsm8B -matchlen_loop_match_nolit_encodeBlockAsm8B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBlockAsm8B - matchlen_match4_match_nolit_encodeBlockAsm8B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm8B @@ -5854,15 +6074,43 @@ match_dst_size_check_encodeBetterBlockAsm: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm + +matchlen_match8_match_nolit_encodeBetterBlockAsm: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm -matchlen_loopback_match_nolit_encodeBetterBlockAsm: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -5874,12 +6122,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm -matchlen_loop_match_nolit_encodeBetterBlockAsm: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm - matchlen_match4_match_nolit_encodeBetterBlockAsm: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm @@ -6926,15 +7168,43 @@ match_dst_size_check_encodeBetterBlockAsm4MB: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm4MB + +matchlen_match8_match_nolit_encodeBetterBlockAsm4MB: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB -matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -6946,12 +7216,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm4MB -matchlen_loop_match_nolit_encodeBetterBlockAsm4MB: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB - matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB @@ -7924,15 +8188,43 @@ match_dst_size_check_encodeBetterBlockAsm12B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm12B + +matchlen_match8_match_nolit_encodeBetterBlockAsm12B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B -matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -7944,12 +8236,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm12B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm12B -matchlen_loop_match_nolit_encodeBetterBlockAsm12B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B - matchlen_match4_match_nolit_encodeBetterBlockAsm12B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B @@ -8775,15 +9061,43 @@ match_dst_size_check_encodeBetterBlockAsm10B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm10B + +matchlen_match8_match_nolit_encodeBetterBlockAsm10B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B -matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -8795,12 +9109,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm10B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm10B -matchlen_loop_match_nolit_encodeBetterBlockAsm10B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B - matchlen_match4_match_nolit_encodeBetterBlockAsm10B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B @@ -9626,15 +9934,43 @@ match_dst_size_check_encodeBetterBlockAsm8B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B + +matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeBetterBlockAsm8B + +matchlen_match8_match_nolit_encodeBetterBlockAsm8B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B -matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B - +matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -9646,12 +9982,6 @@ matchlen_loopback_match_nolit_encodeBetterBlockAsm8B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeBetterBlockAsm8B -matchlen_loop_match_nolit_encodeBetterBlockAsm8B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B - matchlen_match4_match_nolit_encodeBetterBlockAsm8B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B @@ -10575,15 +10905,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm + +matchlen_match8_repeat_extend_encodeSnappyBlockAsm: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm - +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -10595,12 +10953,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm -matchlen_loop_repeat_extend_encodeSnappyBlockAsm: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm - matchlen_match4_repeat_extend_encodeSnappyBlockAsm: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm @@ -10897,15 +11249,43 @@ match_nolit_loop_encodeSnappyBlockAsm: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm + +matchlen_match8_match_nolit_encodeSnappyBlockAsm: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm -matchlen_loopback_match_nolit_encodeSnappyBlockAsm: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -10917,12 +11297,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm -matchlen_loop_match_nolit_encodeSnappyBlockAsm: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm - matchlen_match4_match_nolit_encodeSnappyBlockAsm: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm @@ -11437,15 +11811,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K + +matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K - +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -11457,12 +11859,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K -matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K - matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K @@ -11719,15 +12115,43 @@ match_nolit_loop_encodeSnappyBlockAsm64K: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm64K + +matchlen_match8_match_nolit_encodeSnappyBlockAsm64K: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K -matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -11739,12 +12163,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm64K -matchlen_loop_match_nolit_encodeSnappyBlockAsm64K: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K - matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K @@ -12219,15 +12637,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B + +matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B - +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -12239,12 +12685,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B -matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B - matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B @@ -12501,15 +12941,43 @@ match_nolit_loop_encodeSnappyBlockAsm12B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm12B + +matchlen_match8_match_nolit_encodeSnappyBlockAsm12B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B -matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -12521,12 +12989,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm12B -matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B - matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B @@ -13001,15 +13463,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B + +matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B - +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -13021,12 +13511,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B -matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B - matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B @@ -13283,15 +13767,43 @@ match_nolit_loop_encodeSnappyBlockAsm10B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm10B + +matchlen_match8_match_nolit_encodeSnappyBlockAsm10B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B -matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -13303,12 +13815,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm10B -matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B - matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B @@ -13783,15 +14289,43 @@ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B + +matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B + +matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B -matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B - +matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -13803,12 +14337,6 @@ matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B -matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B - matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B @@ -14063,15 +14591,43 @@ match_nolit_loop_encodeSnappyBlockAsm8B: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B + +matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_encodeSnappyBlockAsm8B + +matchlen_match8_match_nolit_encodeSnappyBlockAsm8B: CMPL SI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B -matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B - +matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -14083,12 +14639,6 @@ matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B: LEAL (R9)(R8*1), R9 JMP match_nolit_end_encodeSnappyBlockAsm8B -matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B - matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: CMPL SI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B @@ -14473,15 +15023,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -14493,12 +15071,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm @@ -15096,15 +15668,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm64K: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm64K + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -15116,12 +15716,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K @@ -15654,15 +16248,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm12B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm12B + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -15674,12 +16296,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B @@ -16212,15 +16828,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm10B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm10B + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -16232,12 +16876,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B @@ -16770,15 +17408,43 @@ match_dst_size_check_encodeSnappyBetterBlockAsm8B: // matchLen XORL R11, R11 + +matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B: + CMPL DI, $0x10 + JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B + MOVQ (R8)(R11*1), R10 + MOVQ 8(R8)(R11*1), R12 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B + XORQ 8(R9)(R11*1), R12 + JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -16(DI), DI + LEAL 16(R11), R11 + JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B + +matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B: +#ifdef GOAMD64_v3 + TZCNTQ R12, R12 + +#else + BSFQ R12, R12 + +#endif + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 + JMP match_nolit_end_encodeSnappyBetterBlockAsm8B + +matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B: CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B + MOVQ (R8)(R11*1), R10 + XORQ (R9)(R11*1), R10 + JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -8(DI), DI + LEAL 8(R11), R11 + JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B -matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 - TESTQ R10, R10 - JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B - +matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 TZCNTQ R10, R10 @@ -16790,12 +17456,6 @@ matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B: LEAL (R11)(R10*1), R11 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B -matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B: - LEAL -8(DI), DI - LEAL 8(R11), R11 - CMPL DI, $0x08 - JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B - matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B @@ -17343,15 +18003,43 @@ emit_literal_done_repeat_emit_calcBlockSize: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_calcBlockSize: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_calcBlockSize + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_calcBlockSize + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_calcBlockSize + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_calcBlockSize + +matchlen_bsf_16repeat_extend_calcBlockSize: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_calcBlockSize + +matchlen_match8_repeat_extend_calcBlockSize: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_calcBlockSize + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_calcBlockSize + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_calcBlockSize -matchlen_loopback_repeat_extend_calcBlockSize: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_calcBlockSize - +matchlen_bsf_8_repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -17363,12 +18051,6 @@ matchlen_loopback_repeat_extend_calcBlockSize: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_calcBlockSize -matchlen_loop_repeat_extend_calcBlockSize: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_calcBlockSize - matchlen_match4_repeat_extend_calcBlockSize: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_calcBlockSize @@ -17554,15 +18236,43 @@ match_nolit_loop_calcBlockSize: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_calcBlockSize: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_calcBlockSize + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_calcBlockSize + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_calcBlockSize + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_calcBlockSize + +matchlen_bsf_16match_nolit_calcBlockSize: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_calcBlockSize + +matchlen_match8_match_nolit_calcBlockSize: CMPL SI, $0x08 JB matchlen_match4_match_nolit_calcBlockSize + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_calcBlockSize + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_calcBlockSize -matchlen_loopback_match_nolit_calcBlockSize: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_calcBlockSize - +matchlen_bsf_8_match_nolit_calcBlockSize: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -17574,12 +18284,6 @@ matchlen_loopback_match_nolit_calcBlockSize: LEAL (R9)(R8*1), R9 JMP match_nolit_end_calcBlockSize -matchlen_loop_match_nolit_calcBlockSize: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_calcBlockSize - matchlen_match4_match_nolit_calcBlockSize: CMPL SI, $0x04 JB matchlen_match2_match_nolit_calcBlockSize @@ -17872,15 +18576,43 @@ emit_literal_done_repeat_emit_calcBlockSizeSmall: // matchLen XORL R10, R10 + +matchlen_loopback_16_repeat_extend_calcBlockSizeSmall: + CMPL DI, $0x10 + JB matchlen_match8_repeat_extend_calcBlockSizeSmall + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall + XORQ 8(BX)(R10*1), R11 + JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall + LEAL -16(DI), DI + LEAL 16(R10), R10 + JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall + +matchlen_bsf_16repeat_extend_calcBlockSizeSmall: +#ifdef GOAMD64_v3 + TZCNTQ R11, R11 + +#else + BSFQ R11, R11 + +#endif + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 + JMP repeat_extend_forward_end_calcBlockSizeSmall + +matchlen_match8_repeat_extend_calcBlockSizeSmall: CMPL DI, $0x08 JB matchlen_match4_repeat_extend_calcBlockSizeSmall + MOVQ (R8)(R10*1), R9 + XORQ (BX)(R10*1), R9 + JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall + LEAL -8(DI), DI + LEAL 8(R10), R10 + JMP matchlen_match4_repeat_extend_calcBlockSizeSmall -matchlen_loopback_repeat_extend_calcBlockSizeSmall: - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 - TESTQ R9, R9 - JZ matchlen_loop_repeat_extend_calcBlockSizeSmall - +matchlen_bsf_8_repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 TZCNTQ R9, R9 @@ -17892,12 +18624,6 @@ matchlen_loopback_repeat_extend_calcBlockSizeSmall: LEAL (R10)(R9*1), R10 JMP repeat_extend_forward_end_calcBlockSizeSmall -matchlen_loop_repeat_extend_calcBlockSizeSmall: - LEAL -8(DI), DI - LEAL 8(R10), R10 - CMPL DI, $0x08 - JAE matchlen_loopback_repeat_extend_calcBlockSizeSmall - matchlen_match4_repeat_extend_calcBlockSizeSmall: CMPL DI, $0x04 JB matchlen_match2_repeat_extend_calcBlockSizeSmall @@ -18053,15 +18779,43 @@ match_nolit_loop_calcBlockSizeSmall: // matchLen XORL R9, R9 + +matchlen_loopback_16_match_nolit_calcBlockSizeSmall: + CMPL SI, $0x10 + JB matchlen_match8_match_nolit_calcBlockSizeSmall + MOVQ (DI)(R9*1), R8 + MOVQ 8(DI)(R9*1), R10 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall + XORQ 8(BX)(R9*1), R10 + JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall + LEAL -16(SI), SI + LEAL 16(R9), R9 + JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall + +matchlen_bsf_16match_nolit_calcBlockSizeSmall: +#ifdef GOAMD64_v3 + TZCNTQ R10, R10 + +#else + BSFQ R10, R10 + +#endif + SARQ $0x03, R10 + LEAL 8(R9)(R10*1), R9 + JMP match_nolit_end_calcBlockSizeSmall + +matchlen_match8_match_nolit_calcBlockSizeSmall: CMPL SI, $0x08 JB matchlen_match4_match_nolit_calcBlockSizeSmall + MOVQ (DI)(R9*1), R8 + XORQ (BX)(R9*1), R8 + JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall + LEAL -8(SI), SI + LEAL 8(R9), R9 + JMP matchlen_match4_match_nolit_calcBlockSizeSmall -matchlen_loopback_match_nolit_calcBlockSizeSmall: - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 - TESTQ R8, R8 - JZ matchlen_loop_match_nolit_calcBlockSizeSmall - +matchlen_bsf_8_match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 TZCNTQ R8, R8 @@ -18073,12 +18827,6 @@ matchlen_loopback_match_nolit_calcBlockSizeSmall: LEAL (R9)(R8*1), R9 JMP match_nolit_end_calcBlockSizeSmall -matchlen_loop_match_nolit_calcBlockSizeSmall: - LEAL -8(SI), SI - LEAL 8(R9), R9 - CMPL SI, $0x08 - JAE matchlen_loopback_match_nolit_calcBlockSizeSmall - matchlen_match4_match_nolit_calcBlockSizeSmall: CMPL SI, $0x04 JB matchlen_match2_match_nolit_calcBlockSizeSmall @@ -18840,15 +19588,43 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 // matchLen XORL SI, SI + +matchlen_loopback_16_standalone: + CMPL DX, $0x10 + JB matchlen_match8_standalone + MOVQ (AX)(SI*1), BX + MOVQ 8(AX)(SI*1), DI + XORQ (CX)(SI*1), BX + JNZ matchlen_bsf_8_standalone + XORQ 8(CX)(SI*1), DI + JNZ matchlen_bsf_16standalone + LEAL -16(DX), DX + LEAL 16(SI), SI + JMP matchlen_loopback_16_standalone + +matchlen_bsf_16standalone: +#ifdef GOAMD64_v3 + TZCNTQ DI, DI + +#else + BSFQ DI, DI + +#endif + SARQ $0x03, DI + LEAL 8(SI)(DI*1), SI + JMP gen_match_len_end + +matchlen_match8_standalone: CMPL DX, $0x08 JB matchlen_match4_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JNZ matchlen_bsf_8_standalone + LEAL -8(DX), DX + LEAL 8(SI), SI + JMP matchlen_match4_standalone -matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone - +matchlen_bsf_8_standalone: #ifdef GOAMD64_v3 TZCNTQ BX, BX @@ -18860,12 +19636,6 @@ matchlen_loopback_standalone: LEAL (SI)(BX*1), SI JMP gen_match_len_end -matchlen_loop_standalone: - LEAL -8(DX), DX - LEAL 8(SI), SI - CMPL DX, $0x08 - JAE matchlen_loopback_standalone - matchlen_match4_standalone: CMPL DX, $0x04 JB matchlen_match2_standalone diff --git a/vendor/github.com/klauspost/compress/s2/index.go b/vendor/github.com/klauspost/compress/s2/index.go index dd9ecfe71..18a4f7acd 100644 --- a/vendor/github.com/klauspost/compress/s2/index.go +++ b/vendor/github.com/klauspost/compress/s2/index.go @@ -511,24 +511,22 @@ func IndexStream(r io.Reader) ([]byte, error) { // JSON returns the index as JSON text. func (i *Index) JSON() []byte { + type offset struct { + CompressedOffset int64 `json:"compressed"` + UncompressedOffset int64 `json:"uncompressed"` + } x := struct { - TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown. - TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown. - Offsets []struct { - CompressedOffset int64 `json:"compressed"` - UncompressedOffset int64 `json:"uncompressed"` - } `json:"offsets"` - EstBlockUncomp int64 `json:"est_block_uncompressed"` + TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown. + TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown. + Offsets []offset `json:"offsets"` + EstBlockUncomp int64 `json:"est_block_uncompressed"` }{ TotalUncompressed: i.TotalUncompressed, TotalCompressed: i.TotalCompressed, EstBlockUncomp: i.estBlockUncomp, } for _, v := range i.info { - x.Offsets = append(x.Offsets, struct { - CompressedOffset int64 `json:"compressed"` - UncompressedOffset int64 `json:"uncompressed"` - }{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset}) + x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset}) } b, _ := json.MarshalIndent(x, "", " ") return b |