diff options
author | 2023-10-31 11:12:22 +0000 | |
---|---|---|
committer | 2023-10-31 11:12:22 +0000 | |
commit | ce71a5a7902963538fc54583588850563f6746cc (patch) | |
tree | 3e869eba6d25d2db5fe81184ffee595e451b3147 /vendor/github.com/klauspost/compress/flate | |
parent | [bugfix] Relax `Mention` parsing, allowing either href or name (#2320) (diff) | |
download | gotosocial-ce71a5a7902963538fc54583588850563f6746cc.tar.xz |
[feature] add per-uri dereferencer locks (#2291)
Diffstat (limited to 'vendor/github.com/klauspost/compress/flate')
8 files changed, 612 insertions, 55 deletions
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 5faea0b2b..de912e187 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -7,6 +7,7 @@ package flate import ( "encoding/binary" + "errors" "fmt" "io" "math" @@ -833,6 +834,12 @@ func (d *compressor) init(w io.Writer, level int) (err error) { d.initDeflate() d.fill = (*compressor).fillDeflate d.step = (*compressor).deflateLazy + case -level >= MinCustomWindowSize && -level <= MaxCustomWindowSize: + d.w.logNewTablePenalty = 7 + d.fast = &fastEncL5Window{maxOffset: int32(-level), cur: maxStoreBlockSize} + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast default: return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) } @@ -929,6 +936,28 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { return zw, err } +// MinCustomWindowSize is the minimum window size that can be sent to NewWriterWindow. +const MinCustomWindowSize = 32 + +// MaxCustomWindowSize is the maximum custom window that can be sent to NewWriterWindow. +const MaxCustomWindowSize = windowSize + +// NewWriterWindow returns a new Writer compressing data with a custom window size. +// windowSize must be from MinCustomWindowSize to MaxCustomWindowSize. +func NewWriterWindow(w io.Writer, windowSize int) (*Writer, error) { + if windowSize < MinCustomWindowSize { + return nil, errors.New("flate: requested window size less than MinWindowSize") + } + if windowSize > MaxCustomWindowSize { + return nil, errors.New("flate: requested window size bigger than MaxCustomWindowSize") + } + var dw Writer + if err := dw.d.init(w, -windowSize); err != nil { + return nil, err + } + return &dw, nil +} + // A Writer takes data written to it and writes the compressed // form of that data to an underlying writer (see NewWriter). type Writer struct { diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go index 24caf5f70..c8124b5c4 100644 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -8,7 +8,6 @@ package flate import ( "encoding/binary" "fmt" - "math/bits" ) type fastEnc interface { @@ -192,25 +191,3 @@ func (e *fastGen) Reset() { } e.hist = e.hist[:0] } - -// matchLen returns the maximum length. -// 'a' must be the shortest of the two. -func matchLen(a, b []byte) int { - var checked int - - for len(a) >= 8 { - if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { - return checked + (bits.TrailingZeros64(diff) >> 3) - } - checked += 8 - a = a[8:] - b = b[8:] - } - b = b[:len(a)] - for i := range a { - if a[i] != b[i] { - return i + checked - } - } - return len(a) + checked -} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 414c0bea9..2f410d64f 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -120,8 +120,9 @@ func (h *huffmanDecoder) init(lengths []int) bool { const sanity = false if h.chunks == nil { - h.chunks = &[huffmanNumChunks]uint16{} + h.chunks = new([huffmanNumChunks]uint16) } + if h.maxRead != 0 { *h = huffmanDecoder{chunks: h.chunks, links: h.links} } @@ -175,6 +176,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { } h.maxRead = min + chunks := h.chunks[:] for i := range chunks { chunks[i] = 0 @@ -202,8 +204,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { if cap(h.links[off]) < numLinks { h.links[off] = make([]uint16, numLinks) } else { - links := h.links[off][:0] - h.links[off] = links[:numLinks] + h.links[off] = h.links[off][:numLinks] } } } else { @@ -277,7 +278,7 @@ func (h *huffmanDecoder) init(lengths []int) bool { return true } -// The actual read interface needed by NewReader. +// Reader is the actual read interface needed by NewReader. // If the passed in io.Reader does not also have ReadByte, // the NewReader will introduce its own buffering. type Reader interface { @@ -285,6 +286,18 @@ type Reader interface { io.ByteReader } +type step uint8 + +const ( + copyData step = iota + 1 + nextBlock + huffmanBytesBuffer + huffmanBytesReader + huffmanBufioReader + huffmanStringsReader + huffmanGenericReader +) + // Decompress state. type decompressor struct { // Input source. @@ -303,7 +316,7 @@ type decompressor struct { // Next step in the decompression, // and decompression state. - step func(*decompressor) + step step stepState int err error toRead []byte @@ -342,7 +355,7 @@ func (f *decompressor) nextBlock() { // compressed, fixed Huffman tables f.hl = &fixedHuffmanDecoder f.hd = nil - f.huffmanBlockDecoder()() + f.huffmanBlockDecoder() if debugDecode { fmt.Println("predefinied huffman block") } @@ -353,7 +366,7 @@ func (f *decompressor) nextBlock() { } f.hl = &f.h1 f.hd = &f.h2 - f.huffmanBlockDecoder()() + f.huffmanBlockDecoder() if debugDecode { fmt.Println("dynamic huffman block") } @@ -379,14 +392,16 @@ func (f *decompressor) Read(b []byte) (int, error) { if f.err != nil { return 0, f.err } - f.step(f) + + f.doStep() + if f.err != nil && len(f.toRead) == 0 { f.toRead = f.dict.readFlush() // Flush what's left in case of error } } } -// Support the io.WriteTo interface for io.Copy and friends. +// WriteTo implements the io.WriteTo interface for io.Copy and friends. func (f *decompressor) WriteTo(w io.Writer) (int64, error) { total := int64(0) flushed := false @@ -410,7 +425,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) { return total, f.err } if f.err == nil { - f.step(f) + f.doStep() } if len(f.toRead) == 0 && f.err != nil && !flushed { f.toRead = f.dict.readFlush() // Flush what's left in case of error @@ -631,7 +646,7 @@ func (f *decompressor) copyData() { if f.dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = f.dict.readFlush() - f.step = (*decompressor).copyData + f.step = copyData return } f.finishBlock() @@ -644,7 +659,28 @@ func (f *decompressor) finishBlock() { } f.err = io.EOF } - f.step = (*decompressor).nextBlock + f.step = nextBlock +} + +func (f *decompressor) doStep() { + switch f.step { + case copyData: + f.copyData() + case nextBlock: + f.nextBlock() + case huffmanBytesBuffer: + f.huffmanBytesBuffer() + case huffmanBytesReader: + f.huffmanBytesReader() + case huffmanBufioReader: + f.huffmanBufioReader() + case huffmanStringsReader: + f.huffmanStringsReader() + case huffmanGenericReader: + f.huffmanGenericReader() + default: + panic("BUG: unexpected step state") + } } // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. @@ -747,7 +783,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { h1: f.h1, h2: f.h2, dict: f.dict, - step: (*decompressor).nextBlock, + step: nextBlock, } f.dict.init(maxMatchOffset, dict) return nil @@ -768,7 +804,7 @@ func NewReader(r io.Reader) io.ReadCloser { f.r = makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock + f.step = nextBlock f.dict.init(maxMatchOffset, nil) return &f } @@ -787,7 +823,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { f.r = makeReader(r) f.bits = new([maxNumLit + maxNumDist]int) f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock + f.step = nextBlock f.dict.init(maxMatchOffset, dict) return &f } diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go index 61342b6b8..2b2f993f7 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -85,7 +85,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer + f.step = huffmanBytesBuffer f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -251,7 +251,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work + f.step = huffmanBytesBuffer // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -336,7 +336,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader + f.step = huffmanBytesReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -502,7 +502,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader // We need to continue this work + f.step = huffmanBytesReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -587,7 +587,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader + f.step = huffmanBufioReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -753,7 +753,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader // We need to continue this work + f.step = huffmanBufioReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -838,7 +838,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader + f.step = huffmanStringsReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -1004,7 +1004,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.step = huffmanStringsReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -1089,7 +1089,7 @@ readLiteral: dict.writeByte(byte(v)) if dict.availWrite() == 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader + f.step = huffmanGenericReader f.stepState = stateInit f.b, f.nb = fb, fnb return @@ -1255,7 +1255,7 @@ copyHistory: if dict.availWrite() == 0 || f.copyLen > 0 { f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader // We need to continue this work + f.step = huffmanGenericReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb return @@ -1265,19 +1265,19 @@ copyHistory: // Not reached } -func (f *decompressor) huffmanBlockDecoder() func() { +func (f *decompressor) huffmanBlockDecoder() { switch f.r.(type) { case *bytes.Buffer: - return f.huffmanBytesBuffer + f.huffmanBytesBuffer() case *bytes.Reader: - return f.huffmanBytesReader + f.huffmanBytesReader() case *bufio.Reader: - return f.huffmanBufioReader + f.huffmanBufioReader() case *strings.Reader: - return f.huffmanStringsReader + f.huffmanStringsReader() case Reader: - return f.huffmanGenericReader + f.huffmanGenericReader() default: - return f.huffmanGenericReader + f.huffmanGenericReader() } } diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go index 83ef50ba4..1f61ec182 100644 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -308,3 +308,401 @@ emitRemainder: emitLiteral(dst, src[nextEmit:]) } } + +// fastEncL5Window is a level 5 encoder, +// but with a custom window size. +type fastEncL5Window struct { + hist []byte + cur int32 + maxOffset int32 + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5Window) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + hashShortBytes = 4 + ) + maxMatchOffset := e.maxOffset + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hashLen(cv, tableBits, hashShortBytes) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hashLen(next, tableBits, hashShortBytes) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + if l == 0 { + // Extend the 4-byte match as long as possible. + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + // Allow some bytes at the beginning to mismatch. + // Sweet spot is 2/3 bytes depending on input. + // 3 is only a little better when it is but sometimes a lot worse. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + t2 := eLong - e.cur - l + skipBeginning + s2 := s + skipBeginning + off := s2 - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(s2, t2, src); l2 > l { + t = t2 + l = l2 + s = s2 + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + e.table[hashLen(cv, tableBits, hashShortBytes)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hashLen(x, tableBits, hashShortBytes) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} + +// Reset the encoding table. +func (e *fastEncL5Window) Reset() { + // We keep the same allocs, since we are compressing the same block sizes. + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= int32(bufferReset) { + e.cur += e.maxOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} + +func (e *fastEncL5Window) addBlock(src []byte) int32 { + // check if we have space already + maxMatchOffset := e.maxOffset + + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < int(maxMatchOffset*2) { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlen(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + s1 := int(s) + maxMatchLength - 4 + if s1 > len(src) { + s1 = len(src) + } + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:s1], src[t:])) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastEncL5Window) matchlenLong(s, t int32, src []byte) int32 { + if debugDeflate { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > e.maxOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go new file mode 100644 index 000000000..4bd388584 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.go @@ -0,0 +1,16 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package flate + +// matchLen returns how many bytes match in a and b +// +// It assumes that: +// +// len(a) <= len(b) and len(a) > 0 +// +//go:noescape +func matchLen(a []byte, b []byte) int diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s new file mode 100644 index 000000000..9a7655c0f --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_amd64.s @@ -0,0 +1,68 @@ +// Copied from S2 implementation. + +//go:build !appengine && !noasm && gc && !noasm + +#include "textflag.h" + +// func matchLen(a []byte, b []byte) int +// Requires: BMI +TEXT ·matchLen(SB), NOSPLIT, $0-56 + MOVQ a_base+0(FP), AX + MOVQ b_base+24(FP), CX + MOVQ a_len+8(FP), DX + + // matchLen + XORL SI, SI + CMPL DX, $0x08 + JB matchlen_match4_standalone + +matchlen_loopback_standalone: + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + TESTQ BX, BX + JZ matchlen_loop_standalone + +#ifdef GOAMD64_v3 + TZCNTQ BX, BX +#else + BSFQ BX, BX +#endif + SARQ $0x03, BX + LEAL (SI)(BX*1), SI + JMP gen_match_len_end + +matchlen_loop_standalone: + LEAL -8(DX), DX + LEAL 8(SI), SI + CMPL DX, $0x08 + JAE matchlen_loopback_standalone + +matchlen_match4_standalone: + CMPL DX, $0x04 + JB matchlen_match2_standalone + MOVL (AX)(SI*1), BX + CMPL (CX)(SI*1), BX + JNE matchlen_match2_standalone + LEAL -4(DX), DX + LEAL 4(SI), SI + +matchlen_match2_standalone: + CMPL DX, $0x02 + JB matchlen_match1_standalone + MOVW (AX)(SI*1), BX + CMPW (CX)(SI*1), BX + JNE matchlen_match1_standalone + LEAL -2(DX), DX + LEAL 2(SI), SI + +matchlen_match1_standalone: + CMPL DX, $0x01 + JB gen_match_len_end + MOVB (AX)(SI*1), BL + CMPB (CX)(SI*1), BL + JNE gen_match_len_end + INCL SI + +gen_match_len_end: + MOVQ SI, ret+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/flate/matchlen_generic.go b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go new file mode 100644 index 000000000..ad5cd814b --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/matchlen_generic.go @@ -0,0 +1,33 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "math/bits" +) + +// matchLen returns the maximum common prefix length of a and b. +// a must be the shortest of the two. +func matchLen(a, b []byte) (n int) { + for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] { + diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b) + if diff != 0 { + return n + bits.TrailingZeros64(diff)>>3 + } + n += 8 + } + + for i := range a { + if a[i] != b[i] { + break + } + n++ + } + return n + +} |