summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/compress/s2
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2')
-rw-r--r--vendor/github.com/klauspost/compress/s2/.gitignore15
-rw-r--r--vendor/github.com/klauspost/compress/s2/LICENSE28
-rw-r--r--vendor/github.com/klauspost/compress/s2/README.md937
-rw-r--r--vendor/github.com/klauspost/compress/s2/decode.go762
-rw-r--r--vendor/github.com/klauspost/compress/s2/decode_amd64.s568
-rw-r--r--vendor/github.com/klauspost/compress/s2/decode_arm64.s574
-rw-r--r--vendor/github.com/klauspost/compress/s2/decode_asm.go17
-rw-r--r--vendor/github.com/klauspost/compress/s2/decode_other.go267
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode.go1347
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_all.go456
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_amd64.go142
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_best.go604
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_better.go431
-rw-r--r--vendor/github.com/klauspost/compress/s2/encode_go.go298
-rw-r--r--vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go189
-rw-r--r--vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s16701
-rw-r--r--vendor/github.com/klauspost/compress/s2/index.go525
-rw-r--r--vendor/github.com/klauspost/compress/s2/s2.go143
18 files changed, 0 insertions, 24004 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/.gitignore b/vendor/github.com/klauspost/compress/s2/.gitignore
deleted file mode 100644
index 3a89c6e3e..000000000
--- a/vendor/github.com/klauspost/compress/s2/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-testdata/bench
-
-# These explicitly listed benchmark data files are for an obsolete version of
-# snappy_test.go.
-testdata/alice29.txt
-testdata/asyoulik.txt
-testdata/fireworks.jpeg
-testdata/geo.protodata
-testdata/html
-testdata/html_x_4
-testdata/kppkn.gtb
-testdata/lcet10.txt
-testdata/paper-100k.pdf
-testdata/plrabn12.txt
-testdata/urls.10K
diff --git a/vendor/github.com/klauspost/compress/s2/LICENSE b/vendor/github.com/klauspost/compress/s2/LICENSE
deleted file mode 100644
index 1d2d645bd..000000000
--- a/vendor/github.com/klauspost/compress/s2/LICENSE
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
-Copyright (c) 2019 Klaus Post. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/klauspost/compress/s2/README.md b/vendor/github.com/klauspost/compress/s2/README.md
deleted file mode 100644
index 119793456..000000000
--- a/vendor/github.com/klauspost/compress/s2/README.md
+++ /dev/null
@@ -1,937 +0,0 @@
-# S2 Compression
-
-S2 is an extension of [Snappy](https://github.com/google/snappy).
-
-S2 is aimed for high throughput, which is why it features concurrent compression for bigger payloads.
-
-Decoding is compatible with Snappy compressed content, but content compressed with S2 cannot be decompressed by Snappy.
-This means that S2 can seamlessly replace Snappy without converting compressed content.
-
-S2 can produce Snappy compatible output, faster and better than Snappy.
-If you want full benefit of the changes you should use s2 without Snappy compatibility.
-
-S2 is designed to have high throughput on content that cannot be compressed.
-This is important, so you don't have to worry about spending CPU cycles on already compressed data.
-
-## Benefits over Snappy
-
-* Better compression
-* Adjustable compression (3 levels)
-* Concurrent stream compression
-* Faster decompression, even for Snappy compatible content
-* Ability to quickly skip forward in compressed stream
-* Random seeking with indexes
-* Compatible with reading Snappy compressed content
-* Smaller block size overhead on incompressible blocks
-* Block concatenation
-* Uncompressed stream mode
-* Automatic stream size padding
-* Snappy compatible block compression
-
-## Drawbacks over Snappy
-
-* Not optimized for 32 bit systems
-* Streams use slightly more memory due to larger blocks and concurrency (configurable)
-
-# Usage
-
-Installation: `go get -u github.com/klauspost/compress/s2`
-
-Full package documentation:
-
-[![godoc][1]][2]
-
-[1]: https://godoc.org/github.com/klauspost/compress?status.svg
-[2]: https://godoc.org/github.com/klauspost/compress/s2
-
-## Compression
-
-```Go
-func EncodeStream(src io.Reader, dst io.Writer) error {
- enc := s2.NewWriter(dst)
- _, err := io.Copy(enc, src)
- if err != nil {
- enc.Close()
- return err
- }
- // Blocks until compression is done.
- return enc.Close()
-}
-```
-
-You should always call `enc.Close()`, otherwise you will leak resources and your encode will be incomplete.
-
-For the best throughput, you should attempt to reuse the `Writer` using the `Reset()` method.
-
-The Writer in S2 is always buffered, therefore `NewBufferedWriter` in Snappy can be replaced with `NewWriter` in S2.
-It is possible to flush any buffered data using the `Flush()` method.
-This will block until all data sent to the encoder has been written to the output.
-
-S2 also supports the `io.ReaderFrom` interface, which will consume all input from a reader.
-
-As a final method to compress data, if you have a single block of data you would like to have encoded as a stream,
-a slightly more efficient method is to use the `EncodeBuffer` method.
-This will take ownership of the buffer until the stream is closed.
-
-```Go
-func EncodeStream(src []byte, dst io.Writer) error {
- enc := s2.NewWriter(dst)
- // The encoder owns the buffer until Flush or Close is called.
- err := enc.EncodeBuffer(buf)
- if err != nil {
- enc.Close()
- return err
- }
- // Blocks until compression is done.
- return enc.Close()
-}
-```
-
-Each call to `EncodeBuffer` will result in discrete blocks being created without buffering,
-so it should only be used a single time per stream.
-If you need to write several blocks, you should use the regular io.Writer interface.
-
-
-## Decompression
-
-```Go
-func DecodeStream(src io.Reader, dst io.Writer) error {
- dec := s2.NewReader(src)
- _, err := io.Copy(dst, dec)
- return err
-}
-```
-
-Similar to the Writer, a Reader can be reused using the `Reset` method.
-
-For the best possible throughput, there is a `EncodeBuffer(buf []byte)` function available.
-However, it requires that the provided buffer isn't used after it is handed over to S2 and until the stream is flushed or closed.
-
-For smaller data blocks, there is also a non-streaming interface: `Encode()`, `EncodeBetter()` and `Decode()`.
-Do however note that these functions (similar to Snappy) does not provide validation of data,
-so data corruption may be undetected. Stream encoding provides CRC checks of data.
-
-It is possible to efficiently skip forward in a compressed stream using the `Skip()` method.
-For big skips the decompressor is able to skip blocks without decompressing them.
-
-## Single Blocks
-
-Similar to Snappy S2 offers single block compression.
-Blocks do not offer the same flexibility and safety as streams,
-but may be preferable for very small payloads, less than 100K.
-
-Using a simple `dst := s2.Encode(nil, src)` will compress `src` and return the compressed result.
-It is possible to provide a destination buffer.
-If the buffer has a capacity of `s2.MaxEncodedLen(len(src))` it will be used.
-If not a new will be allocated.
-
-Alternatively `EncodeBetter`/`EncodeBest` can also be used for better, but slightly slower compression.
-
-Similarly to decompress a block you can use `dst, err := s2.Decode(nil, src)`.
-Again an optional destination buffer can be supplied.
-The `s2.DecodedLen(src)` can be used to get the minimum capacity needed.
-If that is not satisfied a new buffer will be allocated.
-
-Block function always operate on a single goroutine since it should only be used for small payloads.
-
-# Commandline tools
-
-Some very simply commandline tools are provided; `s2c` for compression and `s2d` for decompression.
-
-Binaries can be downloaded on the [Releases Page](https://github.com/klauspost/compress/releases).
-
-Installing then requires Go to be installed. To install them, use:
-
-`go install github.com/klauspost/compress/s2/cmd/s2c@latest && go install github.com/klauspost/compress/s2/cmd/s2d@latest`
-
-To build binaries to the current folder use:
-
-`go build github.com/klauspost/compress/s2/cmd/s2c && go build github.com/klauspost/compress/s2/cmd/s2d`
-
-
-## s2c
-
-```
-Usage: s2c [options] file1 file2
-
-Compresses all files supplied as input separately.
-Output files are written as 'filename.ext.s2' or 'filename.ext.snappy'.
-By default output files will be overwritten.
-Use - as the only file name to read from stdin and write to stdout.
-
-Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
-Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
-
-File names beginning with 'http://' and 'https://' will be downloaded and compressed.
-Only http response code 200 is accepted.
-
-Options:
- -bench int
- Run benchmark n times. No output will be written
- -blocksize string
- Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M")
- -c Write all output to stdout. Multiple input files will be concatenated
- -cpu int
- Compress using this amount of threads (default 32)
- -faster
- Compress faster, but with a minor compression loss
- -help
- Display help
- -index
- Add seek index (default true)
- -o string
- Write output to another file. Single input file only
- -pad string
- Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1")
- -q Don't write any output to terminal, except errors
- -rm
- Delete source file(s) after successful compression
- -safe
- Do not overwrite output files
- -slower
- Compress more, but a lot slower
- -snappy
- Generate Snappy compatible output stream
- -verify
- Verify written files
-
-```
-
-## s2d
-
-```
-Usage: s2d [options] file1 file2
-
-Decompresses all files supplied as input. Input files must end with '.s2' or '.snappy'.
-Output file names have the extension removed. By default output files will be overwritten.
-Use - as the only file name to read from stdin and write to stdout.
-
-Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
-Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
-
-File names beginning with 'http://' and 'https://' will be downloaded and decompressed.
-Extensions on downloaded files are ignored. Only http response code 200 is accepted.
-
-Options:
- -bench int
- Run benchmark n times. No output will be written
- -c Write all output to stdout. Multiple input files will be concatenated
- -help
- Display help
- -o string
- Write output to another file. Single input file only
- -offset string
- Start at offset. Examples: 92, 64K, 256K, 1M, 4M. Requires Index
- -q Don't write any output to terminal, except errors
- -rm
- Delete source file(s) after successful decompression
- -safe
- Do not overwrite output files
- -tail string
- Return last of compressed file. Examples: 92, 64K, 256K, 1M, 4M. Requires Index
- -verify
- Verify files, but do not write output
-```
-
-## s2sx: self-extracting archives
-
-s2sx allows creating self-extracting archives with no dependencies.
-
-By default, executables are created for the same platforms as the host os,
-but this can be overridden with `-os` and `-arch` parameters.
-
-Extracted files have 0666 permissions, except when untar option used.
-
-```
-Usage: s2sx [options] file1 file2
-
-Compresses all files supplied as input separately.
-If files have '.s2' extension they are assumed to be compressed already.
-Output files are written as 'filename.s2sx' and with '.exe' for windows targets.
-If output is big, an additional file with ".more" is written. This must be included as well.
-By default output files will be overwritten.
-
-Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt
-Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt
-
-Options:
- -arch string
- Destination architecture (default "amd64")
- -c Write all output to stdout. Multiple input files will be concatenated
- -cpu int
- Compress using this amount of threads (default 32)
- -help
- Display help
- -max string
- Maximum executable size. Rest will be written to another file. (default "1G")
- -os string
- Destination operating system (default "windows")
- -q Don't write any output to terminal, except errors
- -rm
- Delete source file(s) after successful compression
- -safe
- Do not overwrite output files
- -untar
- Untar on destination
-```
-
-Available platforms are:
-
- * darwin-amd64
- * darwin-arm64
- * linux-amd64
- * linux-arm
- * linux-arm64
- * linux-mips64
- * linux-ppc64le
- * windows-386
- * windows-amd64
-
-By default, there is a size limit of 1GB for the output executable.
-
-When this is exceeded the remaining file content is written to a file called
-output+`.more`. This file must be included for a successful extraction and
-placed alongside the executable for a successful extraction.
-
-This file *must* have the same name as the executable, so if the executable is renamed,
-so must the `.more` file.
-
-This functionality is disabled with stdin/stdout.
-
-### Self-extracting TAR files
-
-If you wrap a TAR file you can specify `-untar` to make it untar on the destination host.
-
-Files are extracted to the current folder with the path specified in the tar file.
-
-Note that tar files are not validated before they are wrapped.
-
-For security reasons files that move below the root folder are not allowed.
-
-# Performance
-
-This section will focus on comparisons to Snappy.
-This package is solely aimed at replacing Snappy as a high speed compression package.
-If you are mainly looking for better compression [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd)
-gives better compression, but typically at speeds slightly below "better" mode in this package.
-
-Compression is increased compared to Snappy, mostly around 5-20% and the throughput is typically 25-40% increased (single threaded) compared to the Snappy Go implementation.
-
-Streams are concurrently compressed. The stream will be distributed among all available CPU cores for the best possible throughput.
-
-A "better" compression mode is also available. This allows to trade a bit of speed for a minor compression gain.
-The content compressed in this mode is fully compatible with the standard decoder.
-
-Snappy vs S2 **compression** speed on 16 core (32 thread) computer, using all threads and a single thread (1 CPU):
-
-| File | S2 speed | S2 Throughput | S2 % smaller | S2 "better" | "better" throughput | "better" % smaller |
-|-----------------------------------------------------------------------------------------------------|----------|---------------|--------------|-------------|---------------------|--------------------|
-| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 12.70x | 10556 MB/s | 7.35% | 4.15x | 3455 MB/s | 12.79% |
-| (1 CPU) | 1.14x | 948 MB/s | - | 0.42x | 349 MB/s | - |
-| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 17.13x | 14484 MB/s | 31.60% | 10.09x | 8533 MB/s | 37.71% |
-| (1 CPU) | 1.33x | 1127 MB/s | - | 0.70x | 589 MB/s | - |
-| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 15.14x | 12000 MB/s | -5.79% | 6.59x | 5223 MB/s | 5.80% |
-| (1 CPU) | 1.11x | 877 MB/s | - | 0.47x | 370 MB/s | - |
-| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 14.62x | 12116 MB/s | 15.90% | 5.35x | 4430 MB/s | 16.08% |
-| (1 CPU) | 1.38x | 1146 MB/s | - | 0.38x | 312 MB/s | - |
-| [adresser.json](https://files.klauspost.com/compress/adresser.json.zst) | 8.83x | 17579 MB/s | 43.86% | 6.54x | 13011 MB/s | 47.23% |
-| (1 CPU) | 1.14x | 2259 MB/s | - | 0.74x | 1475 MB/s | - |
-| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 16.72x | 14019 MB/s | 24.02% | 10.11x | 8477 MB/s | 30.48% |
-| (1 CPU) | 1.24x | 1043 MB/s | - | 0.70x | 586 MB/s | - |
-| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 13.33x | 9254 MB/s | 1.84% | 6.75x | 4686 MB/s | 6.72% |
-| (1 CPU) | 0.97x | 672 MB/s | - | 0.53x | 366 MB/s | - |
-| sharnd.out.2gb | 2.11x | 12639 MB/s | 0.01% | 1.98x | 11833 MB/s | 0.01% |
-| (1 CPU) | 0.93x | 5594 MB/s | - | 1.34x | 8030 MB/s | - |
-| [enwik9](http://mattmahoney.net/dc/textdata.html) | 19.34x | 8220 MB/s | 3.98% | 7.87x | 3345 MB/s | 15.82% |
-| (1 CPU) | 1.06x | 452 MB/s | - | 0.50x | 213 MB/s | - |
-| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 10.48x | 6124 MB/s | 5.67% | 3.76x | 2197 MB/s | 12.60% |
-| (1 CPU) | 0.97x | 568 MB/s | - | 0.46x | 271 MB/s | - |
-| [enwik10](https://encode.su/threads/3315-enwik10-benchmark-results) | 21.07x | 9020 MB/s | 6.36% | 6.91x | 2959 MB/s | 16.95% |
-| (1 CPU) | 1.07x | 460 MB/s | - | 0.51x | 220 MB/s | - |
-
-### Legend
-
-* `S2 speed`: Speed of S2 compared to Snappy, using 16 cores and 1 core.
-* `S2 throughput`: Throughput of S2 in MB/s.
-* `S2 % smaller`: How many percent of the Snappy output size is S2 better.
-* `S2 "better"`: Speed when enabling "better" compression mode in S2 compared to Snappy.
-* `"better" throughput`: Speed when enabling "better" compression mode in S2 compared to Snappy.
-* `"better" % smaller`: How many percent of the Snappy output size is S2 better when using "better" compression.
-
-There is a good speedup across the board when using a single thread and a significant speedup when using multiple threads.
-
-Machine generated data gets by far the biggest compression boost, with size being being reduced by up to 45% of Snappy size.
-
-The "better" compression mode sees a good improvement in all cases, but usually at a performance cost.
-
-Incompressible content (`sharnd.out.2gb`, 2GB random data) sees the smallest speedup.
-This is likely dominated by synchronization overhead, which is confirmed by the fact that single threaded performance is higher (see above).
-
-## Decompression
-
-S2 attempts to create content that is also fast to decompress, except in "better" mode where the smallest representation is used.
-
-S2 vs Snappy **decompression** speed. Both operating on single core:
-
-| File | S2 Throughput | vs. Snappy | Better Throughput | vs. Snappy |
-|-----------------------------------------------------------------------------------------------------|---------------|------------|-------------------|------------|
-| [rawstudio-mint14.tar](https://files.klauspost.com/compress/rawstudio-mint14.7z) | 2117 MB/s | 1.14x | 1738 MB/s | 0.94x |
-| [github-june-2days-2019.json](https://files.klauspost.com/compress/github-june-2days-2019.json.zst) | 2401 MB/s | 1.25x | 2307 MB/s | 1.20x |
-| [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) | 2075 MB/s | 0.98x | 1764 MB/s | 0.83x |
-| [consensus.db.10gb](https://files.klauspost.com/compress/consensus.db.10gb.zst) | 2967 MB/s | 1.05x | 2885 MB/s | 1.02x |
-| [adresser.json](https://files.klauspost.com/compress/adresser.json.zst) | 4141 MB/s | 1.07x | 4184 MB/s | 1.08x |
-| [gob-stream](https://files.klauspost.com/compress/gob-stream.7z) | 2264 MB/s | 1.12x | 2185 MB/s | 1.08x |
-| [10gb.tar](http://mattmahoney.net/dc/10gb.html) | 1525 MB/s | 1.03x | 1347 MB/s | 0.91x |
-| sharnd.out.2gb | 3813 MB/s | 0.79x | 3900 MB/s | 0.81x |
-| [enwik9](http://mattmahoney.net/dc/textdata.html) | 1246 MB/s | 1.29x | 967 MB/s | 1.00x |
-| [silesia.tar](http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip) | 1433 MB/s | 1.12x | 1203 MB/s | 0.94x |
-| [enwik10](https://encode.su/threads/3315-enwik10-benchmark-results) | 1284 MB/s | 1.32x | 1010 MB/s | 1.04x |
-
-### Legend
-
-* `S2 Throughput`: Decompression speed of S2 encoded content.
-* `Better Throughput`: Decompression speed of S2 "better" encoded content.
-* `vs Snappy`: Decompression speed of S2 "better" mode compared to Snappy and absolute speed.
-
-
-While the decompression code hasn't changed, there is a significant speedup in decompression speed.
-S2 prefers longer matches and will typically only find matches that are 6 bytes or longer.
-While this reduces compression a bit, it improves decompression speed.
-
-The "better" compression mode will actively look for shorter matches, which is why it has a decompression speed quite similar to Snappy.
-
-Without assembly decompression is also very fast; single goroutine decompression speed. No assembly:
-
-| File | S2 Throughput | S2 throughput |
-|--------------------------------|--------------|---------------|
-| consensus.db.10gb.s2 | 1.84x | 2289.8 MB/s |
-| 10gb.tar.s2 | 1.30x | 867.07 MB/s |
-| rawstudio-mint14.tar.s2 | 1.66x | 1329.65 MB/s |
-| github-june-2days-2019.json.s2 | 2.36x | 1831.59 MB/s |
-| github-ranks-backup.bin.s2 | 1.73x | 1390.7 MB/s |
-| enwik9.s2 | 1.67x | 681.53 MB/s |
-| adresser.json.s2 | 3.41x | 4230.53 MB/s |
-| silesia.tar.s2 | 1.52x | 811.58 |
-
-Even though S2 typically compresses better than Snappy, decompression speed is always better.
-
-## Block compression
-
-
-When compressing blocks no concurrent compression is performed just as Snappy.
-This is because blocks are for smaller payloads and generally will not benefit from concurrent compression.
-
-An important change is that incompressible blocks will not be more than at most 10 bytes bigger than the input.
-In rare, worst case scenario Snappy blocks could be significantly bigger than the input.
-
-### Mixed content blocks
-
-The most reliable is a wide dataset.
-For this we use [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z),
-53927 files, total input size: 4,014,735,833 bytes. Single goroutine used.
-
-| * | Input | Output | Reduction | MB/s |
-|-------------------|------------|------------|-----------|--------|
-| S2 | 4014735833 | 1059723369 | 73.60% | **934.34** |
-| S2 Better | 4014735833 | 969670507 | 75.85% | 532.70 |
-| S2 Best | 4014735833 | 906625668 | **77.85%** | 46.84 |
-| Snappy | 4014735833 | 1128706759 | 71.89% | 762.59 |
-| S2, Snappy Output | 4014735833 | 1093821420 | 72.75% | 908.60 |
-| LZ4 | 4014735833 | 1079259294 | 73.12% | 526.94 |
-
-S2 delivers both the best single threaded throughput with regular mode and the best compression rate with "best".
-"Better" mode provides the same compression speed as LZ4 with better compression ratio.
-
-When outputting Snappy compatible output it still delivers better throughput (150MB/s more) and better compression.
-
-As can be seen from the other benchmarks decompression should also be easier on the S2 generated output.
-
-Though they cannot be compared due to different decompression speeds here are the speed/size comparisons for
-other Go compressors:
-
-| * | Input | Output | Reduction | MB/s |
-|-------------------|------------|------------|-----------|--------|
-| Zstd Fastest (Go) | 4014735833 | 794608518 | 80.21% | 236.04 |
-| Zstd Best (Go) | 4014735833 | 704603356 | 82.45% | 35.63 |
-| Deflate (Go) l1 | 4014735833 | 871294239 | 78.30% | 214.04 |
-| Deflate (Go) l9 | 4014735833 | 730389060 | 81.81% | 41.17 |
-
-### Standard block compression
-
-Benchmarking single block performance is subject to a lot more variation since it only tests a limited number of file patterns.
-So individual benchmarks should only be seen as a guideline and the overall picture is more important.
-
-These micro-benchmarks are with data in cache and trained branch predictors. For a more realistic benchmark see the mixed content above.
-
-Block compression. Parallel benchmark running on 16 cores, 16 goroutines.
-
-AMD64 assembly is use for both S2 and Snappy.
-
-| Absolute Perf | Snappy size | S2 Size | Snappy Speed | S2 Speed | Snappy dec | S2 dec |
-|-----------------------|-------------|---------|--------------|-------------|-------------|-------------|
-| html | 22843 | 21111 | 16246 MB/s | 17438 MB/s | 40972 MB/s | 49263 MB/s |
-| urls.10K | 335492 | 287326 | 7943 MB/s | 9693 MB/s | 22523 MB/s | 26484 MB/s |
-| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 273889 MB/s | 718321 MB/s | 827552 MB/s |
-| fireworks.jpeg (200B) | 146 | 155 | 8869 MB/s | 17773 MB/s | 33691 MB/s | 52421 MB/s |
-| paper-100k.pdf | 85304 | 84459 | 167546 MB/s | 101263 MB/s | 326905 MB/s | 291944 MB/s |
-| html_x_4 | 92234 | 21113 | 15194 MB/s | 50670 MB/s | 30843 MB/s | 32217 MB/s |
-| alice29.txt | 88034 | 85975 | 5936 MB/s | 6139 MB/s | 12882 MB/s | 20044 MB/s |
-| asyoulik.txt | 77503 | 79650 | 5517 MB/s | 6366 MB/s | 12735 MB/s | 22806 MB/s |
-| lcet10.txt | 234661 | 220670 | 6235 MB/s | 6067 MB/s | 14519 MB/s | 18697 MB/s |
-| plrabn12.txt | 319267 | 317985 | 5159 MB/s | 5726 MB/s | 11923 MB/s | 19901 MB/s |
-| geo.protodata | 23335 | 18690 | 21220 MB/s | 26529 MB/s | 56271 MB/s | 62540 MB/s |
-| kppkn.gtb | 69526 | 65312 | 9732 MB/s | 8559 MB/s | 18491 MB/s | 18969 MB/s |
-| alice29.txt (128B) | 80 | 82 | 6691 MB/s | 15489 MB/s | 31883 MB/s | 38874 MB/s |
-| alice29.txt (1000B) | 774 | 774 | 12204 MB/s | 13000 MB/s | 48056 MB/s | 52341 MB/s |
-| alice29.txt (10000B) | 6648 | 6933 | 10044 MB/s | 12806 MB/s | 32378 MB/s | 46322 MB/s |
-| alice29.txt (20000B) | 12686 | 13574 | 7733 MB/s | 11210 MB/s | 30566 MB/s | 58969 MB/s |
-
-
-| Relative Perf | Snappy size | S2 size improved | S2 Speed | S2 Dec Speed |
-|-----------------------|-------------|------------------|----------|--------------|
-| html | 22.31% | 7.58% | 1.07x | 1.20x |
-| urls.10K | 47.78% | 14.36% | 1.22x | 1.18x |
-| fireworks.jpeg | 99.95% | -0.05% | 0.78x | 1.15x |
-| fireworks.jpeg (200B) | 73.00% | -6.16% | 2.00x | 1.56x |
-| paper-100k.pdf | 83.30% | 0.99% | 0.60x | 0.89x |
-| html_x_4 | 22.52% | 77.11% | 3.33x | 1.04x |
-| alice29.txt | 57.88% | 2.34% | 1.03x | 1.56x |
-| asyoulik.txt | 61.91% | -2.77% | 1.15x | 1.79x |
-| lcet10.txt | 54.99% | 5.96% | 0.97x | 1.29x |
-| plrabn12.txt | 66.26% | 0.40% | 1.11x | 1.67x |
-| geo.protodata | 19.68% | 19.91% | 1.25x | 1.11x |
-| kppkn.gtb | 37.72% | 6.06% | 0.88x | 1.03x |
-| alice29.txt (128B) | 62.50% | -2.50% | 2.31x | 1.22x |
-| alice29.txt (1000B) | 77.40% | 0.00% | 1.07x | 1.09x |
-| alice29.txt (10000B) | 66.48% | -4.29% | 1.27x | 1.43x |
-| alice29.txt (20000B) | 63.43% | -7.00% | 1.45x | 1.93x |
-
-Speed is generally at or above Snappy. Small blocks gets a significant speedup, although at the expense of size.
-
-Decompression speed is better than Snappy, except in one case.
-
-Since payloads are very small the variance in terms of size is rather big, so they should only be seen as a general guideline.
-
-Size is on average around Snappy, but varies on content type.
-In cases where compression is worse, it usually is compensated by a speed boost.
-
-
-### Better compression
-
-Benchmarking single block performance is subject to a lot more variation since it only tests a limited number of file patterns.
-So individual benchmarks should only be seen as a guideline and the overall picture is more important.
-
-| Absolute Perf | Snappy size | Better Size | Snappy Speed | Better Speed | Snappy dec | Better dec |
-|-----------------------|-------------|-------------|--------------|--------------|-------------|-------------|
-| html | 22843 | 19833 | 16246 MB/s | 7731 MB/s | 40972 MB/s | 40292 MB/s |
-| urls.10K | 335492 | 253529 | 7943 MB/s | 3980 MB/s | 22523 MB/s | 20981 MB/s |
-| fireworks.jpeg | 123034 | 123100 | 349544 MB/s | 9760 MB/s | 718321 MB/s | 823698 MB/s |
-| fireworks.jpeg (200B) | 146 | 142 | 8869 MB/s | 594 MB/s | 33691 MB/s | 30101 MB/s |
-| paper-100k.pdf | 85304 | 82915 | 167546 MB/s | 7470 MB/s | 326905 MB/s | 198869 MB/s |
-| html_x_4 | 92234 | 19841 | 15194 MB/s | 23403 MB/s | 30843 MB/s | 30937 MB/s |
-| alice29.txt | 88034 | 73218 | 5936 MB/s | 2945 MB/s | 12882 MB/s | 16611 MB/s |
-| asyoulik.txt | 77503 | 66844 | 5517 MB/s | 2739 MB/s | 12735 MB/s | 14975 MB/s |
-| lcet10.txt | 234661 | 190589 | 6235 MB/s | 3099 MB/s | 14519 MB/s | 16634 MB/s |
-| plrabn12.txt | 319267 | 270828 | 5159 MB/s | 2600 MB/s | 11923 MB/s | 13382 MB/s |
-| geo.protodata | 23335 | 18278 | 21220 MB/s | 11208 MB/s | 56271 MB/s | 57961 MB/s |
-| kppkn.gtb | 69526 | 61851 | 9732 MB/s | 4556 MB/s | 18491 MB/s | 16524 MB/s |
-| alice29.txt (128B) | 80 | 81 | 6691 MB/s | 529 MB/s | 31883 MB/s | 34225 MB/s |
-| alice29.txt (1000B) | 774 | 748 | 12204 MB/s | 1943 MB/s | 48056 MB/s | 42068 MB/s |
-| alice29.txt (10000B) | 6648 | 6234 | 10044 MB/s | 2949 MB/s | 32378 MB/s | 28813 MB/s |
-| alice29.txt (20000B) | 12686 | 11584 | 7733 MB/s | 2822 MB/s | 30566 MB/s | 27315 MB/s |
-
-
-| Relative Perf | Snappy size | Better size | Better Speed | Better dec |
-|-----------------------|-------------|-------------|--------------|------------|
-| html | 22.31% | 13.18% | 0.48x | 0.98x |
-| urls.10K | 47.78% | 24.43% | 0.50x | 0.93x |
-| fireworks.jpeg | 99.95% | -0.05% | 0.03x | 1.15x |
-| fireworks.jpeg (200B) | 73.00% | 2.74% | 0.07x | 0.89x |
-| paper-100k.pdf | 83.30% | 2.80% | 0.07x | 0.61x |
-| html_x_4 | 22.52% | 78.49% | 0.04x | 1.00x |
-| alice29.txt | 57.88% | 16.83% | 1.54x | 1.29x |
-| asyoulik.txt | 61.91% | 13.75% | 0.50x | 1.18x |
-| lcet10.txt | 54.99% | 18.78% | 0.50x | 1.15x |
-| plrabn12.txt | 66.26% | 15.17% | 0.50x | 1.12x |
-| geo.protodata | 19.68% | 21.67% | 0.50x | 1.03x |
-| kppkn.gtb | 37.72% | 11.04% | 0.53x | 0.89x |
-| alice29.txt (128B) | 62.50% | -1.25% | 0.47x | 1.07x |
-| alice29.txt (1000B) | 77.40% | 3.36% | 0.08x | 0.88x |
-| alice29.txt (10000B) | 66.48% | 6.23% | 0.16x | 0.89x |
-| alice29.txt (20000B) | 63.43% | 8.69% | 0.29x | 0.89x |
-
-Except for the mostly incompressible JPEG image compression is better and usually in the
-double digits in terms of percentage reduction over Snappy.
-
-The PDF sample shows a significant slowdown compared to Snappy, as this mode tries harder
-to compress the data. Very small blocks are also not favorable for better compression, so throughput is way down.
-
-This mode aims to provide better compression at the expense of performance and achieves that
-without a huge performance penalty, except on very small blocks.
-
-Decompression speed suffers a little compared to the regular S2 mode,
-but still manages to be close to Snappy in spite of increased compression.
-
-# Best compression mode
-
-S2 offers a "best" compression mode.
-
-This will compress as much as possible with little regard to CPU usage.
-
-Mainly for offline compression, but where decompression speed should still
-be high and compatible with other S2 compressed data.
-
-Some examples compared on 16 core CPU, amd64 assembly used:
-
-```
-* enwik10
-Default... 10000000000 -> 4761467548 [47.61%]; 1.098s, 8685.6MB/s
-Better... 10000000000 -> 4219438251 [42.19%]; 1.925s, 4954.2MB/s
-Best... 10000000000 -> 3627364337 [36.27%]; 43.051s, 221.5MB/s
-
-* github-june-2days-2019.json
-Default... 6273951764 -> 1043196283 [16.63%]; 431ms, 13882.3MB/s
-Better... 6273951764 -> 949146808 [15.13%]; 547ms, 10938.4MB/s
-Best... 6273951764 -> 832855506 [13.27%]; 9.455s, 632.8MB/s
-
-* nyc-taxi-data-10M.csv
-Default... 3325605752 -> 1095998837 [32.96%]; 324ms, 9788.7MB/s
-Better... 3325605752 -> 954776589 [28.71%]; 491ms, 6459.4MB/s
-Best... 3325605752 -> 779098746 [23.43%]; 8.29s, 382.6MB/s
-
-* 10gb.tar
-Default... 10065157632 -> 5916578242 [58.78%]; 1.028s, 9337.4MB/s
-Better... 10065157632 -> 5649207485 [56.13%]; 1.597s, 6010.6MB/s
-Best... 10065157632 -> 5208719802 [51.75%]; 32.78s, 292.8MB/
-
-* consensus.db.10gb
-Default... 10737418240 -> 4562648848 [42.49%]; 882ms, 11610.0MB/s
-Better... 10737418240 -> 4542428129 [42.30%]; 1.533s, 6679.7MB/s
-Best... 10737418240 -> 4244773384 [39.53%]; 42.96s, 238.4MB/s
-```
-
-Decompression speed should be around the same as using the 'better' compression mode.
-
-# Snappy Compatibility
-
-S2 now offers full compatibility with Snappy.
-
-This means that the efficient encoders of S2 can be used to generate fully Snappy compatible output.
-
-There is a [snappy](https://github.com/klauspost/compress/tree/master/snappy) package that can be used by
-simply changing imports from `github.com/golang/snappy` to `github.com/klauspost/compress/snappy`.
-This uses "better" mode for all operations.
-If you would like more control, you can use the s2 package as described below:
-
-## Blocks
-
-Snappy compatible blocks can be generated with the S2 encoder.
-Compression and speed is typically a bit better `MaxEncodedLen` is also smaller for smaller memory usage. Replace
-
-| Snappy | S2 replacement |
-|----------------------------|-------------------------|
-| snappy.Encode(...) | s2.EncodeSnappy(...) |
-| snappy.MaxEncodedLen(...) | s2.MaxEncodedLen(...) |
-
-`s2.EncodeSnappy` can be replaced with `s2.EncodeSnappyBetter` or `s2.EncodeSnappyBest` to get more efficiently compressed snappy compatible output.
-
-`s2.ConcatBlocks` is compatible with snappy blocks.
-
-Comparison of [`webdevdata.org-2015-01-07-subset`](https://files.klauspost.com/compress/webdevdata.org-2015-01-07-4GB-subset.7z),
-53927 files, total input size: 4,014,735,833 bytes. amd64, single goroutine used:
-
-| Encoder | Size | MB/s | Reduction |
-|-----------------------|------------|------------|------------
-| snappy.Encode | 1128706759 | 725.59 | 71.89% |
-| s2.EncodeSnappy | 1093823291 | **899.16** | 72.75% |
-| s2.EncodeSnappyBetter | 1001158548 | 578.49 | 75.06% |
-| s2.EncodeSnappyBest | 944507998 | 66.00 | **76.47%**|
-
-## Streams
-
-For streams, replace `enc = snappy.NewBufferedWriter(w)` with `enc = s2.NewWriter(w, s2.WriterSnappyCompat())`.
-All other options are available, but note that block size limit is different for snappy.
-
-Comparison of different streams, AMD Ryzen 3950x, 16 cores. Size and throughput:
-
-| File | snappy.NewWriter | S2 Snappy | S2 Snappy, Better | S2 Snappy, Best |
-|-----------------------------|--------------------------|---------------------------|--------------------------|-------------------------|
-| nyc-taxi-data-10M.csv | 1316042016 - 539.47MB/s | 1307003093 - 10132.73MB/s | 1174534014 - 5002.44MB/s | 1115904679 - 177.97MB/s |
-| enwik10 (xml) | 5088294643 - 451.13MB/s | 5175840939 - 9440.69MB/s | 4560784526 - 4487.21MB/s | 4340299103 - 158.92MB/s |
-| 10gb.tar (mixed) | 6056946612 - 729.73MB/s | 6208571995 - 9978.05MB/s | 5741646126 - 4919.98MB/s | 5548973895 - 180.44MB/s |
-| github-june-2days-2019.json | 1525176492 - 933.00MB/s | 1476519054 - 13150.12MB/s | 1400547532 - 5803.40MB/s | 1321887137 - 204.29MB/s |
-| consensus.db.10gb (db) | 5412897703 - 1102.14MB/s | 5354073487 - 13562.91MB/s | 5335069899 - 5294.73MB/s | 5201000954 - 175.72MB/s |
-
-# Decompression
-
-All decompression functions map directly to equivalent s2 functions.
-
-| Snappy | S2 replacement |
-|------------------------|--------------------|
-| snappy.Decode(...) | s2.Decode(...) |
-| snappy.DecodedLen(...) | s2.DecodedLen(...) |
-| snappy.NewReader(...) | s2.NewReader(...) |
-
-Features like [quick forward skipping without decompression](https://pkg.go.dev/github.com/klauspost/compress/s2#Reader.Skip)
-are also available for Snappy streams.
-
-If you know you are only decompressing snappy streams, setting [`ReaderMaxBlockSize(64<<10)`](https://pkg.go.dev/github.com/klauspost/compress/s2#ReaderMaxBlockSize)
-on your Reader will reduce memory consumption.
-
-# Concatenating blocks and streams.
-
-Concatenating streams will concatenate the output of both without recompressing them.
-While this is inefficient in terms of compression it might be usable in certain scenarios.
-The 10 byte 'stream identifier' of the second stream can optionally be stripped, but it is not a requirement.
-
-Blocks can be concatenated using the `ConcatBlocks` function.
-
-Snappy blocks/streams can safely be concatenated with S2 blocks and streams.
-Streams with indexes (see below) will currently not work on concatenated streams.
-
-# Stream Seek Index
-
-S2 and Snappy streams can have indexes. These indexes will allow random seeking within the compressed data.
-
-The index can either be appended to the stream as a skippable block or returned for separate storage.
-
-When the index is appended to a stream it will be skipped by regular decoders,
-so the output remains compatible with other decoders.
-
-## Creating an Index
-
-To automatically add an index to a stream, add `WriterAddIndex()` option to your writer.
-Then the index will be added to the stream when `Close()` is called.
-
-```
- // Add Index to stream...
- enc := s2.NewWriter(w, s2.WriterAddIndex())
- io.Copy(enc, r)
- enc.Close()
-```
-
-If you want to store the index separately, you can use `CloseIndex()` instead of the regular `Close()`.
-This will return the index. Note that `CloseIndex()` should only be called once, and you shouldn't call `Close()`.
-
-```
- // Get index for separate storage...
- enc := s2.NewWriter(w)
- io.Copy(enc, r)
- index, err := enc.CloseIndex()
-```
-
-The `index` can then be used needing to read from the stream.
-This means the index can be used without needing to seek to the end of the stream
-or for manually forwarding streams. See below.
-
-Finally, an existing S2/Snappy stream can be indexed using the `s2.IndexStream(r io.Reader)` function.
-
-## Using Indexes
-
-To use indexes there is a `ReadSeeker(random bool, index []byte) (*ReadSeeker, error)` function available.
-
-Calling ReadSeeker will return an [io.ReadSeeker](https://pkg.go.dev/io#ReadSeeker) compatible version of the reader.
-
-If 'random' is specified the returned io.Seeker can be used for random seeking, otherwise only forward seeking is supported.
-Enabling random seeking requires the original input to support the [io.Seeker](https://pkg.go.dev/io#Seeker) interface.
-
-```
- dec := s2.NewReader(r)
- rs, err := dec.ReadSeeker(false, nil)
- rs.Seek(wantOffset, io.SeekStart)
-```
-
-Get a seeker to seek forward. Since no index is provided, the index is read from the stream.
-This requires that an index was added and that `r` supports the [io.Seeker](https://pkg.go.dev/io#Seeker) interface.
-
-A custom index can be specified which will be used if supplied.
-When using a custom index, it will not be read from the input stream.
-
-```
- dec := s2.NewReader(r)
- rs, err := dec.ReadSeeker(false, index)
- rs.Seek(wantOffset, io.SeekStart)
-```
-
-This will read the index from `index`. Since we specify non-random (forward only) seeking `r` does not have to be an io.Seeker
-
-```
- dec := s2.NewReader(r)
- rs, err := dec.ReadSeeker(true, index)
- rs.Seek(wantOffset, io.SeekStart)
-```
-
-Finally, since we specify that we want to do random seeking `r` must be an io.Seeker.
-
-The returned [ReadSeeker](https://pkg.go.dev/github.com/klauspost/compress/s2#ReadSeeker) contains a shallow reference to the existing Reader,
-meaning changes performed to one is reflected in the other.
-
-To check if a stream contains an index at the end, the `(*Index).LoadStream(rs io.ReadSeeker) error` can be used.
-
-## Manually Forwarding Streams
-
-Indexes can also be read outside the decoder using the [Index](https://pkg.go.dev/github.com/klauspost/compress/s2#Index) type.
-This can be used for parsing indexes, either separate or in streams.
-
-In some cases it may not be possible to serve a seekable stream.
-This can for instance be an HTTP stream, where the Range request
-is sent at the start of the stream.
-
-With a little bit of extra code it is still possible to use indexes
-to forward to specific offset with a single forward skip.
-
-It is possible to load the index manually like this:
-```
- var index s2.Index
- _, err = index.Load(idxBytes)
-```
-
-This can be used to figure out how much to offset the compressed stream:
-
-```
- compressedOffset, uncompressedOffset, err := index.Find(wantOffset)
-```
-
-The `compressedOffset` is the number of bytes that should be skipped
-from the beginning of the compressed file.
-
-The `uncompressedOffset` will then be offset of the uncompressed bytes returned
-when decoding from that position. This will always be <= wantOffset.
-
-When creating a decoder it must be specified that it should *not* expect a stream identifier
-at the beginning of the stream. Assuming the io.Reader `r` has been forwarded to `compressedOffset`
-we create the decoder like this:
-
-```
- dec := s2.NewReader(r, s2.ReaderIgnoreStreamIdentifier())
-```
-
-We are not completely done. We still need to forward the stream the uncompressed bytes we didn't want.
-This is done using the regular "Skip" function:
-
-```
- err = dec.Skip(wantOffset - uncompressedOffset)
-```
-
-This will ensure that we are at exactly the offset we want, and reading from `dec` will start at the requested offset.
-
-## Index Format:
-
-Each block is structured as a snappy skippable block, with the chunk ID 0x99.
-
-The block can be read from the front, but contains information so it can be read from the back as well.
-
-Numbers are stored as fixed size little endian values or [zigzag encoded](https://developers.google.com/protocol-buffers/docs/encoding#signed_integers) [base 128 varints](https://developers.google.com/protocol-buffers/docs/encoding),
-with un-encoded value length of 64 bits, unless other limits are specified.
-
-| Content | Format |
-|---------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|
-| ID, `[1]byte` | Always 0x99. |
-| Data Length, `[3]byte` | 3 byte little-endian length of the chunk in bytes, following this. |
-| Header `[6]byte` | Header, must be `[115, 50, 105, 100, 120, 0]` or in text: "s2idx\x00". |
-| UncompressedSize, Varint | Total Uncompressed size. |
-| CompressedSize, Varint | Total Compressed size if known. Should be -1 if unknown. |
-| EstBlockSize, Varint | Block Size, used for guessing uncompressed offsets. Must be >= 0. |
-| Entries, Varint | Number of Entries in index, must be < 65536 and >=0. |
-| HasUncompressedOffsets `byte` | 0 if no uncompressed offsets are present, 1 if present. Other values are invalid. |
-| UncompressedOffsets, [Entries]VarInt | Uncompressed offsets. See below how to decode. |
-| CompressedOffsets, [Entries]VarInt | Compressed offsets. See below how to decode. |
-| Block Size, `[4]byte` | Little Endian total encoded size (including header and trailer). Can be used for searching backwards to start of block. |
-| Trailer `[6]byte` | Trailer, must be `[0, 120, 100, 105, 50, 115]` or in text: "\x00xdi2s". Can be used for identifying block from end of stream. |
-
-For regular streams the uncompressed offsets are fully predictable,
-so `HasUncompressedOffsets` allows to specify that compressed blocks all have
-exactly `EstBlockSize` bytes of uncompressed content.
-
-Entries *must* be in order, starting with the lowest offset,
-and there *must* be no uncompressed offset duplicates.
-Entries *may* point to the start of a skippable block,
-but it is then not allowed to also have an entry for the next block since
-that would give an uncompressed offset duplicate.
-
-There is no requirement for all blocks to be represented in the index.
-In fact there is a maximum of 65536 block entries in an index.
-
-The writer can use any method to reduce the number of entries.
-An implicit block start at 0,0 can be assumed.
-
-### Decoding entries:
-
-```
-// Read Uncompressed entries.
-// Each assumes EstBlockSize delta from previous.
-for each entry {
- uOff = 0
- if HasUncompressedOffsets == 1 {
- uOff = ReadVarInt // Read value from stream
- }
-
- // Except for the first entry, use previous values.
- if entryNum == 0 {
- entry[entryNum].UncompressedOffset = uOff
- continue
- }
-
- // Uncompressed uses previous offset and adds EstBlockSize
- entry[entryNum].UncompressedOffset = entry[entryNum-1].UncompressedOffset + EstBlockSize
-}
-
-
-// Guess that the first block will be 50% of uncompressed size.
-// Integer truncating division must be used.
-CompressGuess := EstBlockSize / 2
-
-// Read Compressed entries.
-// Each assumes CompressGuess delta from previous.
-// CompressGuess is adjusted for each value.
-for each entry {
- cOff = ReadVarInt // Read value from stream
-
- // Except for the first entry, use previous values.
- if entryNum == 0 {
- entry[entryNum].CompressedOffset = cOff
- continue
- }
-
- // Compressed uses previous and our estimate.
- entry[entryNum].CompressedOffset = entry[entryNum-1].CompressedOffset + CompressGuess + cOff
-
- // Adjust compressed offset for next loop, integer truncating division must be used.
- CompressGuess += cOff/2
-}
-```
-
-# Format Extensions
-
-* Frame [Stream identifier](https://github.com/google/snappy/blob/master/framing_format.txt#L68) changed from `sNaPpY` to `S2sTwO`.
-* [Framed compressed blocks](https://github.com/google/snappy/blob/master/format_description.txt) can be up to 4MB (up from 64KB).
-* Compressed blocks can have an offset of `0`, which indicates to repeat the last seen offset.
-
-Repeat offsets must be encoded as a [2.2.1. Copy with 1-byte offset (01)](https://github.com/google/snappy/blob/master/format_description.txt#L89), where the offset is 0.
-
-The length is specified by reading the 3-bit length specified in the tag and decode using this table:
-
-| Length | Actual Length |
-|--------|----------------------|
-| 0 | 4 |
-| 1 | 5 |
-| 2 | 6 |
-| 3 | 7 |
-| 4 | 8 |
-| 5 | 8 + read 1 byte |
-| 6 | 260 + read 2 bytes |
-| 7 | 65540 + read 3 bytes |
-
-This allows any repeat offset + length to be represented by 2 to 5 bytes.
-
-Lengths are stored as little endian values.
-
-The first copy of a block cannot be a repeat offset and the offset is not carried across blocks in streams.
-
-Default streaming block size is 1MB.
-
-# LICENSE
-
-This code is based on the [Snappy-Go](https://github.com/golang/snappy) implementation.
-
-Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
diff --git a/vendor/github.com/klauspost/compress/s2/decode.go b/vendor/github.com/klauspost/compress/s2/decode.go
deleted file mode 100644
index 9e7fce885..000000000
--- a/vendor/github.com/klauspost/compress/s2/decode.go
+++ /dev/null
@@ -1,762 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
- "encoding/binary"
- "errors"
- "fmt"
- "io"
- "io/ioutil"
-)
-
-var (
- // ErrCorrupt reports that the input is invalid.
- ErrCorrupt = errors.New("s2: corrupt input")
- // ErrCRC reports that the input failed CRC validation (streams only)
- ErrCRC = errors.New("s2: corrupt input, crc mismatch")
- // ErrTooLarge reports that the uncompressed length is too large.
- ErrTooLarge = errors.New("s2: decoded block is too large")
- // ErrUnsupported reports that the input isn't supported.
- ErrUnsupported = errors.New("s2: unsupported input")
-)
-
-// ErrCantSeek is returned if the stream cannot be seeked.
-type ErrCantSeek struct {
- Reason string
-}
-
-// Error returns the error as string.
-func (e ErrCantSeek) Error() string {
- return fmt.Sprintf("s2: Can't seek because %s", e.Reason)
-}
-
-// DecodedLen returns the length of the decoded block.
-func DecodedLen(src []byte) (int, error) {
- v, _, err := decodedLen(src)
- return v, err
-}
-
-// decodedLen returns the length of the decoded block and the number of bytes
-// that the length header occupied.
-func decodedLen(src []byte) (blockLen, headerLen int, err error) {
- v, n := binary.Uvarint(src)
- if n <= 0 || v > 0xffffffff {
- return 0, 0, ErrCorrupt
- }
-
- const wordSize = 32 << (^uint(0) >> 32 & 1)
- if wordSize == 32 && v > 0x7fffffff {
- return 0, 0, ErrTooLarge
- }
- return int(v), n, nil
-}
-
-const (
- decodeErrCodeCorrupt = 1
-)
-
-// Decode returns the decoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire decoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-func Decode(dst, src []byte) ([]byte, error) {
- dLen, s, err := decodedLen(src)
- if err != nil {
- return nil, err
- }
- if dLen <= cap(dst) {
- dst = dst[:dLen]
- } else {
- dst = make([]byte, dLen)
- }
- if s2Decode(dst, src[s:]) != 0 {
- return nil, ErrCorrupt
- }
- return dst, nil
-}
-
-// NewReader returns a new Reader that decompresses from r, using the framing
-// format described at
-// https://github.com/google/snappy/blob/master/framing_format.txt with S2 changes.
-func NewReader(r io.Reader, opts ...ReaderOption) *Reader {
- nr := Reader{
- r: r,
- maxBlock: maxBlockSize,
- }
- for _, opt := range opts {
- if err := opt(&nr); err != nil {
- nr.err = err
- return &nr
- }
- }
- nr.maxBufSize = MaxEncodedLen(nr.maxBlock) + checksumSize
- if nr.lazyBuf > 0 {
- nr.buf = make([]byte, MaxEncodedLen(nr.lazyBuf)+checksumSize)
- } else {
- nr.buf = make([]byte, MaxEncodedLen(defaultBlockSize)+checksumSize)
- }
- nr.readHeader = nr.ignoreStreamID
- nr.paramsOK = true
- return &nr
-}
-
-// ReaderOption is an option for creating a decoder.
-type ReaderOption func(*Reader) error
-
-// ReaderMaxBlockSize allows to control allocations if the stream
-// has been compressed with a smaller WriterBlockSize, or with the default 1MB.
-// Blocks must be this size or smaller to decompress,
-// otherwise the decoder will return ErrUnsupported.
-//
-// For streams compressed with Snappy this can safely be set to 64KB (64 << 10).
-//
-// Default is the maximum limit of 4MB.
-func ReaderMaxBlockSize(blockSize int) ReaderOption {
- return func(r *Reader) error {
- if blockSize > maxBlockSize || blockSize <= 0 {
- return errors.New("s2: block size too large. Must be <= 4MB and > 0")
- }
- if r.lazyBuf == 0 && blockSize < defaultBlockSize {
- r.lazyBuf = blockSize
- }
- r.maxBlock = blockSize
- return nil
- }
-}
-
-// ReaderAllocBlock allows to control upfront stream allocations
-// and not allocate for frames bigger than this initially.
-// If frames bigger than this is seen a bigger buffer will be allocated.
-//
-// Default is 1MB, which is default output size.
-func ReaderAllocBlock(blockSize int) ReaderOption {
- return func(r *Reader) error {
- if blockSize > maxBlockSize || blockSize < 1024 {
- return errors.New("s2: invalid ReaderAllocBlock. Must be <= 4MB and >= 1024")
- }
- r.lazyBuf = blockSize
- return nil
- }
-}
-
-// ReaderIgnoreStreamIdentifier will make the reader skip the expected
-// stream identifier at the beginning of the stream.
-// This can be used when serving a stream that has been forwarded to a specific point.
-func ReaderIgnoreStreamIdentifier() ReaderOption {
- return func(r *Reader) error {
- r.ignoreStreamID = true
- return nil
- }
-}
-
-// ReaderSkippableCB will register a callback for chuncks with the specified ID.
-// ID must be a Reserved skippable chunks ID, 0x80-0xfd (inclusive).
-// For each chunk with the ID, the callback is called with the content.
-// Any returned non-nil error will abort decompression.
-// Only one callback per ID is supported, latest sent will be used.
-func ReaderSkippableCB(id uint8, fn func(r io.Reader) error) ReaderOption {
- return func(r *Reader) error {
- if id < 0x80 || id > 0xfd {
- return fmt.Errorf("ReaderSkippableCB: Invalid id provided, must be 0x80-0xfd (inclusive)")
- }
- r.skippableCB[id] = fn
- return nil
- }
-}
-
-// Reader is an io.Reader that can read Snappy-compressed bytes.
-type Reader struct {
- r io.Reader
- err error
- decoded []byte
- buf []byte
- skippableCB [0x80]func(r io.Reader) error
- blockStart int64 // Uncompressed offset at start of current.
- index *Index
-
- // decoded[i:j] contains decoded bytes that have not yet been passed on.
- i, j int
- // maximum block size allowed.
- maxBlock int
- // maximum expected buffer size.
- maxBufSize int
- // alloc a buffer this size if > 0.
- lazyBuf int
- readHeader bool
- paramsOK bool
- snappyFrame bool
- ignoreStreamID bool
-}
-
-// ensureBufferSize will ensure that the buffer can take at least n bytes.
-// If false is returned the buffer exceeds maximum allowed size.
-func (r *Reader) ensureBufferSize(n int) bool {
- if len(r.buf) >= n {
- return true
- }
- if n > r.maxBufSize {
- r.err = ErrCorrupt
- return false
- }
- // Realloc buffer.
- r.buf = make([]byte, n)
- return true
-}
-
-// Reset discards any buffered data, resets all state, and switches the Snappy
-// reader to read from r. This permits reusing a Reader rather than allocating
-// a new one.
-func (r *Reader) Reset(reader io.Reader) {
- if !r.paramsOK {
- return
- }
- r.index = nil
- r.r = reader
- r.err = nil
- r.i = 0
- r.j = 0
- r.readHeader = r.ignoreStreamID
-}
-
-func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
- if _, r.err = io.ReadFull(r.r, p); r.err != nil {
- if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
- r.err = ErrCorrupt
- }
- return false
- }
- return true
-}
-
-// skippable will skip n bytes.
-// If the supplied reader supports seeking that is used.
-// tmp is used as a temporary buffer for reading.
-// The supplied slice does not need to be the size of the read.
-func (r *Reader) skippable(tmp []byte, n int, allowEOF bool, id uint8) (ok bool) {
- if id < 0x80 {
- r.err = fmt.Errorf("interbal error: skippable id < 0x80")
- return false
- }
- if fn := r.skippableCB[id-0x80]; fn != nil {
- rd := io.LimitReader(r.r, int64(n))
- r.err = fn(rd)
- if r.err != nil {
- return false
- }
- _, r.err = io.CopyBuffer(ioutil.Discard, rd, tmp)
- return r.err == nil
- }
- if rs, ok := r.r.(io.ReadSeeker); ok {
- _, err := rs.Seek(int64(n), io.SeekCurrent)
- if err == nil {
- return true
- }
- if err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
- r.err = ErrCorrupt
- return false
- }
- }
- for n > 0 {
- if n < len(tmp) {
- tmp = tmp[:n]
- }
- if _, r.err = io.ReadFull(r.r, tmp); r.err != nil {
- if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
- r.err = ErrCorrupt
- }
- return false
- }
- n -= len(tmp)
- }
- return true
-}
-
-// Read satisfies the io.Reader interface.
-func (r *Reader) Read(p []byte) (int, error) {
- if r.err != nil {
- return 0, r.err
- }
- for {
- if r.i < r.j {
- n := copy(p, r.decoded[r.i:r.j])
- r.i += n
- return n, nil
- }
- if !r.readFull(r.buf[:4], true) {
- return 0, r.err
- }
- chunkType := r.buf[0]
- if !r.readHeader {
- if chunkType != chunkTypeStreamIdentifier {
- r.err = ErrCorrupt
- return 0, r.err
- }
- r.readHeader = true
- }
- chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
-
- // The chunk types are specified at
- // https://github.com/google/snappy/blob/master/framing_format.txt
- switch chunkType {
- case chunkTypeCompressedData:
- r.blockStart += int64(r.j)
- // Section 4.2. Compressed data (chunk type 0x00).
- if chunkLen < checksumSize {
- r.err = ErrCorrupt
- return 0, r.err
- }
- if !r.ensureBufferSize(chunkLen) {
- if r.err == nil {
- r.err = ErrUnsupported
- }
- return 0, r.err
- }
- buf := r.buf[:chunkLen]
- if !r.readFull(buf, false) {
- return 0, r.err
- }
- checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
- buf = buf[checksumSize:]
-
- n, err := DecodedLen(buf)
- if err != nil {
- r.err = err
- return 0, r.err
- }
- if r.snappyFrame && n > maxSnappyBlockSize {
- r.err = ErrCorrupt
- return 0, r.err
- }
-
- if n > len(r.decoded) {
- if n > r.maxBlock {
- r.err = ErrCorrupt
- return 0, r.err
- }
- r.decoded = make([]byte, n)
- }
- if _, err := Decode(r.decoded, buf); err != nil {
- r.err = err
- return 0, r.err
- }
- if crc(r.decoded[:n]) != checksum {
- r.err = ErrCRC
- return 0, r.err
- }
- r.i, r.j = 0, n
- continue
-
- case chunkTypeUncompressedData:
- r.blockStart += int64(r.j)
- // Section 4.3. Uncompressed data (chunk type 0x01).
- if chunkLen < checksumSize {
- r.err = ErrCorrupt
- return 0, r.err
- }
- if !r.ensureBufferSize(chunkLen) {
- if r.err == nil {
- r.err = ErrUnsupported
- }
- return 0, r.err
- }
- buf := r.buf[:checksumSize]
- if !r.readFull(buf, false) {
- return 0, r.err
- }
- checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
- // Read directly into r.decoded instead of via r.buf.
- n := chunkLen - checksumSize
- if r.snappyFrame && n > maxSnappyBlockSize {
- r.err = ErrCorrupt
- return 0, r.err
- }
- if n > len(r.decoded) {
- if n > r.maxBlock {
- r.err = ErrCorrupt
- return 0, r.err
- }
- r.decoded = make([]byte, n)
- }
- if !r.readFull(r.decoded[:n], false) {
- return 0, r.err
- }
- if crc(r.decoded[:n]) != checksum {
- r.err = ErrCRC
- return 0, r.err
- }
- r.i, r.j = 0, n
- continue
-
- case chunkTypeStreamIdentifier:
- // Section 4.1. Stream identifier (chunk type 0xff).
- if chunkLen != len(magicBody) {
- r.err = ErrCorrupt
- return 0, r.err
- }
- if !r.readFull(r.buf[:len(magicBody)], false) {
- return 0, r.err
- }
- if string(r.buf[:len(magicBody)]) != magicBody {
- if string(r.buf[:len(magicBody)]) != magicBodySnappy {
- r.err = ErrCorrupt
- return 0, r.err
- } else {
- r.snappyFrame = true
- }
- } else {
- r.snappyFrame = false
- }
- continue
- }
-
- if chunkType <= 0x7f {
- // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
- // fmt.Printf("ERR chunktype: 0x%x\n", chunkType)
- r.err = ErrUnsupported
- return 0, r.err
- }
- // Section 4.4 Padding (chunk type 0xfe).
- // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
- if chunkLen > maxChunkSize {
- // fmt.Printf("ERR chunkLen: 0x%x\n", chunkLen)
- r.err = ErrUnsupported
- return 0, r.err
- }
-
- // fmt.Printf("skippable: ID: 0x%x, len: 0x%x\n", chunkType, chunkLen)
- if !r.skippable(r.buf, chunkLen, false, chunkType) {
- return 0, r.err
- }
- }
-}
-
-// Skip will skip n bytes forward in the decompressed output.
-// For larger skips this consumes less CPU and is faster than reading output and discarding it.
-// CRC is not checked on skipped blocks.
-// io.ErrUnexpectedEOF is returned if the stream ends before all bytes have been skipped.
-// If a decoding error is encountered subsequent calls to Read will also fail.
-func (r *Reader) Skip(n int64) error {
- if n < 0 {
- return errors.New("attempted negative skip")
- }
- if r.err != nil {
- return r.err
- }
-
- for n > 0 {
- if r.i < r.j {
- // Skip in buffer.
- // decoded[i:j] contains decoded bytes that have not yet been passed on.
- left := int64(r.j - r.i)
- if left >= n {
- r.i += int(n)
- return nil
- }
- n -= int64(r.j - r.i)
- r.i = r.j
- }
-
- // Buffer empty; read blocks until we have content.
- if !r.readFull(r.buf[:4], true) {
- if r.err == io.EOF {
- r.err = io.ErrUnexpectedEOF
- }
- return r.err
- }
- chunkType := r.buf[0]
- if !r.readHeader {
- if chunkType != chunkTypeStreamIdentifier {
- r.err = ErrCorrupt
- return r.err
- }
- r.readHeader = true
- }
- chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
-
- // The chunk types are specified at
- // https://github.com/google/snappy/blob/master/framing_format.txt
- switch chunkType {
- case chunkTypeCompressedData:
- r.blockStart += int64(r.j)
- // Section 4.2. Compressed data (chunk type 0x00).
- if chunkLen < checksumSize {
- r.err = ErrCorrupt
- return r.err
- }
- if !r.ensureBufferSize(chunkLen) {
- if r.err == nil {
- r.err = ErrUnsupported
- }
- return r.err
- }
- buf := r.buf[:chunkLen]
- if !r.readFull(buf, false) {
- return r.err
- }
- checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
- buf = buf[checksumSize:]
-
- dLen, err := DecodedLen(buf)
- if err != nil {
- r.err = err
- return r.err
- }
- if dLen > r.maxBlock {
- r.err = ErrCorrupt
- return r.err
- }
- // Check if destination is within this block
- if int64(dLen) > n {
- if len(r.decoded) < dLen {
- r.decoded = make([]byte, dLen)
- }
- if _, err := Decode(r.decoded, buf); err != nil {
- r.err = err
- return r.err
- }
- if crc(r.decoded[:dLen]) != checksum {
- r.err = ErrCorrupt
- return r.err
- }
- } else {
- // Skip block completely
- n -= int64(dLen)
- dLen = 0
- }
- r.i, r.j = 0, dLen
- continue
- case chunkTypeUncompressedData:
- r.blockStart += int64(r.j)
- // Section 4.3. Uncompressed data (chunk type 0x01).
- if chunkLen < checksumSize {
- r.err = ErrCorrupt
- return r.err
- }
- if !r.ensureBufferSize(chunkLen) {
- if r.err != nil {
- r.err = ErrUnsupported
- }
- return r.err
- }
- buf := r.buf[:checksumSize]
- if !r.readFull(buf, false) {
- return r.err
- }
- checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
- // Read directly into r.decoded instead of via r.buf.
- n2 := chunkLen - checksumSize
- if n2 > len(r.decoded) {
- if n2 > r.maxBlock {
- r.err = ErrCorrupt
- return r.err
- }
- r.decoded = make([]byte, n2)
- }
- if !r.readFull(r.decoded[:n2], false) {
- return r.err
- }
- if int64(n2) < n {
- if crc(r.decoded[:n2]) != checksum {
- r.err = ErrCorrupt
- return r.err
- }
- }
- r.i, r.j = 0, n2
- continue
- case chunkTypeStreamIdentifier:
- // Section 4.1. Stream identifier (chunk type 0xff).
- if chunkLen != len(magicBody) {
- r.err = ErrCorrupt
- return r.err
- }
- if !r.readFull(r.buf[:len(magicBody)], false) {
- return r.err
- }
- if string(r.buf[:len(magicBody)]) != magicBody {
- if string(r.buf[:len(magicBody)]) != magicBodySnappy {
- r.err = ErrCorrupt
- return r.err
- }
- }
-
- continue
- }
-
- if chunkType <= 0x7f {
- // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
- r.err = ErrUnsupported
- return r.err
- }
- if chunkLen > maxChunkSize {
- r.err = ErrUnsupported
- return r.err
- }
- // Section 4.4 Padding (chunk type 0xfe).
- // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
- if !r.skippable(r.buf, chunkLen, false, chunkType) {
- return r.err
- }
- }
- return nil
-}
-
-// ReadSeeker provides random or forward seeking in compressed content.
-// See Reader.ReadSeeker
-type ReadSeeker struct {
- *Reader
-}
-
-// ReadSeeker will return an io.ReadSeeker compatible version of the reader.
-// If 'random' is specified the returned io.Seeker can be used for
-// random seeking, otherwise only forward seeking is supported.
-// Enabling random seeking requires the original input to support
-// the io.Seeker interface.
-// A custom index can be specified which will be used if supplied.
-// When using a custom index, it will not be read from the input stream.
-// The returned ReadSeeker contains a shallow reference to the existing Reader,
-// meaning changes performed to one is reflected in the other.
-func (r *Reader) ReadSeeker(random bool, index []byte) (*ReadSeeker, error) {
- // Read index if provided.
- if len(index) != 0 {
- if r.index == nil {
- r.index = &Index{}
- }
- if _, err := r.index.Load(index); err != nil {
- return nil, ErrCantSeek{Reason: "loading index returned: " + err.Error()}
- }
- }
-
- // Check if input is seekable
- rs, ok := r.r.(io.ReadSeeker)
- if !ok {
- if !random {
- return &ReadSeeker{Reader: r}, nil
- }
- return nil, ErrCantSeek{Reason: "input stream isn't seekable"}
- }
-
- if r.index != nil {
- // Seekable and index, ok...
- return &ReadSeeker{Reader: r}, nil
- }
-
- // Load from stream.
- r.index = &Index{}
-
- // Read current position.
- pos, err := rs.Seek(0, io.SeekCurrent)
- if err != nil {
- return nil, ErrCantSeek{Reason: "seeking input returned: " + err.Error()}
- }
- err = r.index.LoadStream(rs)
- if err != nil {
- if err == ErrUnsupported {
- return nil, ErrCantSeek{Reason: "input stream does not contain an index"}
- }
- return nil, ErrCantSeek{Reason: "reading index returned: " + err.Error()}
- }
-
- // reset position.
- _, err = rs.Seek(pos, io.SeekStart)
- if err != nil {
- return nil, ErrCantSeek{Reason: "seeking input returned: " + err.Error()}
- }
- return &ReadSeeker{Reader: r}, nil
-}
-
-// Seek allows seeking in compressed data.
-func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
- if r.err != nil {
- return 0, r.err
- }
- if offset == 0 && whence == io.SeekCurrent {
- return r.blockStart + int64(r.i), nil
- }
- if !r.readHeader {
- // Make sure we read the header.
- _, r.err = r.Read([]byte{})
- }
- rs, ok := r.r.(io.ReadSeeker)
- if r.index == nil || !ok {
- if whence == io.SeekCurrent && offset >= 0 {
- err := r.Skip(offset)
- return r.blockStart + int64(r.i), err
- }
- if whence == io.SeekStart && offset >= r.blockStart+int64(r.i) {
- err := r.Skip(offset - r.blockStart - int64(r.i))
- return r.blockStart + int64(r.i), err
- }
- return 0, ErrUnsupported
-
- }
-
- switch whence {
- case io.SeekCurrent:
- offset += r.blockStart + int64(r.i)
- case io.SeekEnd:
- offset = -offset
- }
- c, u, err := r.index.Find(offset)
- if err != nil {
- return r.blockStart + int64(r.i), err
- }
-
- // Seek to next block
- _, err = rs.Seek(c, io.SeekStart)
- if err != nil {
- return 0, err
- }
-
- if offset < 0 {
- offset = r.index.TotalUncompressed + offset
- }
-
- r.i = r.j // Remove rest of current block.
- if u < offset {
- // Forward inside block
- return offset, r.Skip(offset - u)
- }
- return offset, nil
-}
-
-// ReadByte satisfies the io.ByteReader interface.
-func (r *Reader) ReadByte() (byte, error) {
- if r.err != nil {
- return 0, r.err
- }
- if r.i < r.j {
- c := r.decoded[r.i]
- r.i++
- return c, nil
- }
- var tmp [1]byte
- for i := 0; i < 10; i++ {
- n, err := r.Read(tmp[:])
- if err != nil {
- return 0, err
- }
- if n == 1 {
- return tmp[0], nil
- }
- }
- return 0, io.ErrNoProgress
-}
-
-// SkippableCB will register a callback for chunks with the specified ID.
-// ID must be a Reserved skippable chunks ID, 0x80-0xfe (inclusive).
-// For each chunk with the ID, the callback is called with the content.
-// Any returned non-nil error will abort decompression.
-// Only one callback per ID is supported, latest sent will be used.
-// Sending a nil function will disable previous callbacks.
-func (r *Reader) SkippableCB(id uint8, fn func(r io.Reader) error) error {
- if id < 0x80 || id > chunkTypePadding {
- return fmt.Errorf("ReaderSkippableCB: Invalid id provided, must be 0x80-0xfe (inclusive)")
- }
- r.skippableCB[id] = fn
- return nil
-}
diff --git a/vendor/github.com/klauspost/compress/s2/decode_amd64.s b/vendor/github.com/klauspost/compress/s2/decode_amd64.s
deleted file mode 100644
index 9b105e03c..000000000
--- a/vendor/github.com/klauspost/compress/s2/decode_amd64.s
+++ /dev/null
@@ -1,568 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-#define R_TMP0 AX
-#define R_TMP1 BX
-#define R_LEN CX
-#define R_OFF DX
-#define R_SRC SI
-#define R_DST DI
-#define R_DBASE R8
-#define R_DLEN R9
-#define R_DEND R10
-#define R_SBASE R11
-#define R_SLEN R12
-#define R_SEND R13
-#define R_TMP2 R14
-#define R_TMP3 R15
-
-// The asm code generally follows the pure Go code in decode_other.go, except
-// where marked with a "!!!".
-
-// func decode(dst, src []byte) int
-//
-// All local variables fit into registers. The non-zero stack size is only to
-// spill registers and push args when issuing a CALL. The register allocation:
-// - R_TMP0 scratch
-// - R_TMP1 scratch
-// - R_LEN length or x (shared)
-// - R_OFF offset
-// - R_SRC &src[s]
-// - R_DST &dst[d]
-// + R_DBASE dst_base
-// + R_DLEN dst_len
-// + R_DEND dst_base + dst_len
-// + R_SBASE src_base
-// + R_SLEN src_len
-// + R_SEND src_base + src_len
-// - R_TMP2 used by doCopy
-// - R_TMP3 used by doCopy
-//
-// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
-// function, and after a CALL returns, and are not otherwise modified.
-//
-// The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST.
-// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
-TEXT ·s2Decode(SB), NOSPLIT, $48-56
- // Initialize R_SRC, R_DST and R_DBASE-R_SEND.
- MOVQ dst_base+0(FP), R_DBASE
- MOVQ dst_len+8(FP), R_DLEN
- MOVQ R_DBASE, R_DST
- MOVQ R_DBASE, R_DEND
- ADDQ R_DLEN, R_DEND
- MOVQ src_base+24(FP), R_SBASE
- MOVQ src_len+32(FP), R_SLEN
- MOVQ R_SBASE, R_SRC
- MOVQ R_SBASE, R_SEND
- ADDQ R_SLEN, R_SEND
- XORQ R_OFF, R_OFF
-
-loop:
- // for s < len(src)
- CMPQ R_SRC, R_SEND
- JEQ end
-
- // R_LEN = uint32(src[s])
- //
- // switch src[s] & 0x03
- MOVBLZX (R_SRC), R_LEN
- MOVL R_LEN, R_TMP1
- ANDL $3, R_TMP1
- CMPL R_TMP1, $1
- JAE tagCopy
-
- // ----------------------------------------
- // The code below handles literal tags.
-
- // case tagLiteral:
- // x := uint32(src[s] >> 2)
- // switch
- SHRL $2, R_LEN
- CMPL R_LEN, $60
- JAE tagLit60Plus
-
- // case x < 60:
- // s++
- INCQ R_SRC
-
-doLit:
- // This is the end of the inner "switch", when we have a literal tag.
- //
- // We assume that R_LEN == x and x fits in a uint32, where x is the variable
- // used in the pure Go decode_other.go code.
-
- // length = int(x) + 1
- //
- // Unlike the pure Go code, we don't need to check if length <= 0 because
- // R_LEN can hold 64 bits, so the increment cannot overflow.
- INCQ R_LEN
-
- // Prepare to check if copying length bytes will run past the end of dst or
- // src.
- //
- // R_TMP0 = len(dst) - d
- // R_TMP1 = len(src) - s
- MOVQ R_DEND, R_TMP0
- SUBQ R_DST, R_TMP0
- MOVQ R_SEND, R_TMP1
- SUBQ R_SRC, R_TMP1
-
- // !!! Try a faster technique for short (16 or fewer bytes) copies.
- //
- // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
- // goto callMemmove // Fall back on calling runtime·memmove.
- // }
- //
- // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
- // against 21 instead of 16, because it cannot assume that all of its input
- // is contiguous in memory and so it needs to leave enough source bytes to
- // read the next tag without refilling buffers, but Go's Decode assumes
- // contiguousness (the src argument is a []byte).
- CMPQ R_LEN, $16
- JGT callMemmove
- CMPQ R_TMP0, $16
- JLT callMemmove
- CMPQ R_TMP1, $16
- JLT callMemmove
-
- // !!! Implement the copy from src to dst as a 16-byte load and store.
- // (Decode's documentation says that dst and src must not overlap.)
- //
- // This always copies 16 bytes, instead of only length bytes, but that's
- // OK. If the input is a valid Snappy encoding then subsequent iterations
- // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
- // non-nil error), so the overrun will be ignored.
- //
- // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
- // 16-byte loads and stores. This technique probably wouldn't be as
- // effective on architectures that are fussier about alignment.
- MOVOU 0(R_SRC), X0
- MOVOU X0, 0(R_DST)
-
- // d += length
- // s += length
- ADDQ R_LEN, R_DST
- ADDQ R_LEN, R_SRC
- JMP loop
-
-callMemmove:
- // if length > len(dst)-d || length > len(src)-s { etc }
- CMPQ R_LEN, R_TMP0
- JGT errCorrupt
- CMPQ R_LEN, R_TMP1
- JGT errCorrupt
-
- // copy(dst[d:], src[s:s+length])
- //
- // This means calling runtime·memmove(&dst[d], &src[s], length), so we push
- // R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
- // three registers to the stack, to save local variables across the CALL.
- MOVQ R_DST, 0(SP)
- MOVQ R_SRC, 8(SP)
- MOVQ R_LEN, 16(SP)
- MOVQ R_DST, 24(SP)
- MOVQ R_SRC, 32(SP)
- MOVQ R_LEN, 40(SP)
- MOVQ R_OFF, 48(SP)
- CALL runtime·memmove(SB)
-
- // Restore local variables: unspill registers from the stack and
- // re-calculate R_DBASE-R_SEND.
- MOVQ 24(SP), R_DST
- MOVQ 32(SP), R_SRC
- MOVQ 40(SP), R_LEN
- MOVQ 48(SP), R_OFF
- MOVQ dst_base+0(FP), R_DBASE
- MOVQ dst_len+8(FP), R_DLEN
- MOVQ R_DBASE, R_DEND
- ADDQ R_DLEN, R_DEND
- MOVQ src_base+24(FP), R_SBASE
- MOVQ src_len+32(FP), R_SLEN
- MOVQ R_SBASE, R_SEND
- ADDQ R_SLEN, R_SEND
-
- // d += length
- // s += length
- ADDQ R_LEN, R_DST
- ADDQ R_LEN, R_SRC
- JMP loop
-
-tagLit60Plus:
- // !!! This fragment does the
- //
- // s += x - 58; if uint(s) > uint(len(src)) { etc }
- //
- // checks. In the asm version, we code it once instead of once per switch case.
- ADDQ R_LEN, R_SRC
- SUBQ $58, R_SRC
- CMPQ R_SRC, R_SEND
- JA errCorrupt
-
- // case x == 60:
- CMPL R_LEN, $61
- JEQ tagLit61
- JA tagLit62Plus
-
- // x = uint32(src[s-1])
- MOVBLZX -1(R_SRC), R_LEN
- JMP doLit
-
-tagLit61:
- // case x == 61:
- // x = uint32(src[s-2]) | uint32(src[s-1])<<8
- MOVWLZX -2(R_SRC), R_LEN
- JMP doLit
-
-tagLit62Plus:
- CMPL R_LEN, $62
- JA tagLit63
-
- // case x == 62:
- // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
- // We read one byte, safe to read one back, since we are just reading tag.
- // x = binary.LittleEndian.Uint32(src[s-1:]) >> 8
- MOVL -4(R_SRC), R_LEN
- SHRL $8, R_LEN
- JMP doLit
-
-tagLit63:
- // case x == 63:
- // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
- MOVL -4(R_SRC), R_LEN
- JMP doLit
-
-// The code above handles literal tags.
-// ----------------------------------------
-// The code below handles copy tags.
-
-tagCopy4:
- // case tagCopy4:
- // s += 5
- ADDQ $5, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- CMPQ R_SRC, R_SEND
- JA errCorrupt
-
- // length = 1 + int(src[s-5])>>2
- SHRQ $2, R_LEN
- INCQ R_LEN
-
- // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
- MOVLQZX -4(R_SRC), R_OFF
- JMP doCopy
-
-tagCopy2:
- // case tagCopy2:
- // s += 3
- ADDQ $3, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- CMPQ R_SRC, R_SEND
- JA errCorrupt
-
- // length = 1 + int(src[s-3])>>2
- SHRQ $2, R_LEN
- INCQ R_LEN
-
- // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
- MOVWQZX -2(R_SRC), R_OFF
- JMP doCopy
-
-tagCopy:
- // We have a copy tag. We assume that:
- // - R_TMP1 == src[s] & 0x03
- // - R_LEN == src[s]
- CMPQ R_TMP1, $2
- JEQ tagCopy2
- JA tagCopy4
-
- // case tagCopy1:
- // s += 2
- ADDQ $2, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- CMPQ R_SRC, R_SEND
- JA errCorrupt
-
- // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
- // length = 4 + int(src[s-2])>>2&0x7
- MOVBQZX -1(R_SRC), R_TMP1
- MOVQ R_LEN, R_TMP0
- SHRQ $2, R_LEN
- ANDQ $0xe0, R_TMP0
- ANDQ $7, R_LEN
- SHLQ $3, R_TMP0
- ADDQ $4, R_LEN
- ORQ R_TMP1, R_TMP0
-
- // check if repeat code, ZF set by ORQ.
- JZ repeatCode
-
- // This is a regular copy, transfer our temporary value to R_OFF (length)
- MOVQ R_TMP0, R_OFF
- JMP doCopy
-
-// This is a repeat code.
-repeatCode:
- // If length < 9, reuse last offset, with the length already calculated.
- CMPQ R_LEN, $9
- JL doCopyRepeat
-
- // Read additional bytes for length.
- JE repeatLen1
-
- // Rare, so the extra branch shouldn't hurt too much.
- CMPQ R_LEN, $10
- JE repeatLen2
- JMP repeatLen3
-
-// Read repeat lengths.
-repeatLen1:
- // s ++
- ADDQ $1, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- CMPQ R_SRC, R_SEND
- JA errCorrupt
-
- // length = src[s-1] + 8
- MOVBQZX -1(R_SRC), R_LEN
- ADDL $8, R_LEN
- JMP doCopyRepeat
-
-repeatLen2:
- // s +=2
- ADDQ $2, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- CMPQ R_SRC, R_SEND
- JA errCorrupt
-
- // length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + (1 << 8)
- MOVWQZX -2(R_SRC), R_LEN
- ADDL $260, R_LEN
- JMP doCopyRepeat
-
-repeatLen3:
- // s +=3
- ADDQ $3, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- CMPQ R_SRC, R_SEND
- JA errCorrupt
-
- // length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + (1 << 16)
- // Read one byte further back (just part of the tag, shifted out)
- MOVL -4(R_SRC), R_LEN
- SHRL $8, R_LEN
- ADDL $65540, R_LEN
- JMP doCopyRepeat
-
-doCopy:
- // This is the end of the outer "switch", when we have a copy tag.
- //
- // We assume that:
- // - R_LEN == length && R_LEN > 0
- // - R_OFF == offset
-
- // if d < offset { etc }
- MOVQ R_DST, R_TMP1
- SUBQ R_DBASE, R_TMP1
- CMPQ R_TMP1, R_OFF
- JLT errCorrupt
-
- // Repeat values can skip the test above, since any offset > 0 will be in dst.
-doCopyRepeat:
- // if offset <= 0 { etc }
- CMPQ R_OFF, $0
- JLE errCorrupt
-
- // if length > len(dst)-d { etc }
- MOVQ R_DEND, R_TMP1
- SUBQ R_DST, R_TMP1
- CMPQ R_LEN, R_TMP1
- JGT errCorrupt
-
- // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
- //
- // Set:
- // - R_TMP2 = len(dst)-d
- // - R_TMP3 = &dst[d-offset]
- MOVQ R_DEND, R_TMP2
- SUBQ R_DST, R_TMP2
- MOVQ R_DST, R_TMP3
- SUBQ R_OFF, R_TMP3
-
- // !!! Try a faster technique for short (16 or fewer bytes) forward copies.
- //
- // First, try using two 8-byte load/stores, similar to the doLit technique
- // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
- // still OK if offset >= 8. Note that this has to be two 8-byte load/stores
- // and not one 16-byte load/store, and the first store has to be before the
- // second load, due to the overlap if offset is in the range [8, 16).
- //
- // if length > 16 || offset < 8 || len(dst)-d < 16 {
- // goto slowForwardCopy
- // }
- // copy 16 bytes
- // d += length
- CMPQ R_LEN, $16
- JGT slowForwardCopy
- CMPQ R_OFF, $8
- JLT slowForwardCopy
- CMPQ R_TMP2, $16
- JLT slowForwardCopy
- MOVQ 0(R_TMP3), R_TMP0
- MOVQ R_TMP0, 0(R_DST)
- MOVQ 8(R_TMP3), R_TMP1
- MOVQ R_TMP1, 8(R_DST)
- ADDQ R_LEN, R_DST
- JMP loop
-
-slowForwardCopy:
- // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
- // can still try 8-byte load stores, provided we can overrun up to 10 extra
- // bytes. As above, the overrun will be fixed up by subsequent iterations
- // of the outermost loop.
- //
- // The C++ snappy code calls this technique IncrementalCopyFastPath. Its
- // commentary says:
- //
- // ----
- //
- // The main part of this loop is a simple copy of eight bytes at a time
- // until we've copied (at least) the requested amount of bytes. However,
- // if d and d-offset are less than eight bytes apart (indicating a
- // repeating pattern of length < 8), we first need to expand the pattern in
- // order to get the correct results. For instance, if the buffer looks like
- // this, with the eight-byte <d-offset> and <d> patterns marked as
- // intervals:
- //
- // abxxxxxxxxxxxx
- // [------] d-offset
- // [------] d
- //
- // a single eight-byte copy from <d-offset> to <d> will repeat the pattern
- // once, after which we can move <d> two bytes without moving <d-offset>:
- //
- // ababxxxxxxxxxx
- // [------] d-offset
- // [------] d
- //
- // and repeat the exercise until the two no longer overlap.
- //
- // This allows us to do very well in the special case of one single byte
- // repeated many times, without taking a big hit for more general cases.
- //
- // The worst case of extra writing past the end of the match occurs when
- // offset == 1 and length == 1; the last copy will read from byte positions
- // [0..7] and write to [4..11], whereas it was only supposed to write to
- // position 1. Thus, ten excess bytes.
- //
- // ----
- //
- // That "10 byte overrun" worst case is confirmed by Go's
- // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
- // and finishSlowForwardCopy algorithm.
- //
- // if length > len(dst)-d-10 {
- // goto verySlowForwardCopy
- // }
- SUBQ $10, R_TMP2
- CMPQ R_LEN, R_TMP2
- JGT verySlowForwardCopy
-
- // We want to keep the offset, so we use R_TMP2 from here.
- MOVQ R_OFF, R_TMP2
-
-makeOffsetAtLeast8:
- // !!! As above, expand the pattern so that offset >= 8 and we can use
- // 8-byte load/stores.
- //
- // for offset < 8 {
- // copy 8 bytes from dst[d-offset:] to dst[d:]
- // length -= offset
- // d += offset
- // offset += offset
- // // The two previous lines together means that d-offset, and therefore
- // // R_TMP3, is unchanged.
- // }
- CMPQ R_TMP2, $8
- JGE fixUpSlowForwardCopy
- MOVQ (R_TMP3), R_TMP1
- MOVQ R_TMP1, (R_DST)
- SUBQ R_TMP2, R_LEN
- ADDQ R_TMP2, R_DST
- ADDQ R_TMP2, R_TMP2
- JMP makeOffsetAtLeast8
-
-fixUpSlowForwardCopy:
- // !!! Add length (which might be negative now) to d (implied by R_DST being
- // &dst[d]) so that d ends up at the right place when we jump back to the
- // top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
- // length is positive, copying the remaining length bytes will write to the
- // right place.
- MOVQ R_DST, R_TMP0
- ADDQ R_LEN, R_DST
-
-finishSlowForwardCopy:
- // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
- // length means that we overrun, but as above, that will be fixed up by
- // subsequent iterations of the outermost loop.
- CMPQ R_LEN, $0
- JLE loop
- MOVQ (R_TMP3), R_TMP1
- MOVQ R_TMP1, (R_TMP0)
- ADDQ $8, R_TMP3
- ADDQ $8, R_TMP0
- SUBQ $8, R_LEN
- JMP finishSlowForwardCopy
-
-verySlowForwardCopy:
- // verySlowForwardCopy is a simple implementation of forward copy. In C
- // parlance, this is a do/while loop instead of a while loop, since we know
- // that length > 0. In Go syntax:
- //
- // for {
- // dst[d] = dst[d - offset]
- // d++
- // length--
- // if length == 0 {
- // break
- // }
- // }
- MOVB (R_TMP3), R_TMP1
- MOVB R_TMP1, (R_DST)
- INCQ R_TMP3
- INCQ R_DST
- DECQ R_LEN
- JNZ verySlowForwardCopy
- JMP loop
-
-// The code above handles copy tags.
-// ----------------------------------------
-
-end:
- // This is the end of the "for s < len(src)".
- //
- // if d != len(dst) { etc }
- CMPQ R_DST, R_DEND
- JNE errCorrupt
-
- // return 0
- MOVQ $0, ret+48(FP)
- RET
-
-errCorrupt:
- // return decodeErrCodeCorrupt
- MOVQ $1, ret+48(FP)
- RET
diff --git a/vendor/github.com/klauspost/compress/s2/decode_arm64.s b/vendor/github.com/klauspost/compress/s2/decode_arm64.s
deleted file mode 100644
index 4b63d5086..000000000
--- a/vendor/github.com/klauspost/compress/s2/decode_arm64.s
+++ /dev/null
@@ -1,574 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-#define R_TMP0 R2
-#define R_TMP1 R3
-#define R_LEN R4
-#define R_OFF R5
-#define R_SRC R6
-#define R_DST R7
-#define R_DBASE R8
-#define R_DLEN R9
-#define R_DEND R10
-#define R_SBASE R11
-#define R_SLEN R12
-#define R_SEND R13
-#define R_TMP2 R14
-#define R_TMP3 R15
-
-// TEST_SRC will check if R_SRC is <= SRC_END
-#define TEST_SRC() \
- CMP R_SEND, R_SRC \
- BGT errCorrupt
-
-// MOVD R_SRC, R_TMP1
-// SUB R_SBASE, R_TMP1, R_TMP1
-// CMP R_SLEN, R_TMP1
-// BGT errCorrupt
-
-// The asm code generally follows the pure Go code in decode_other.go, except
-// where marked with a "!!!".
-
-// func decode(dst, src []byte) int
-//
-// All local variables fit into registers. The non-zero stack size is only to
-// spill registers and push args when issuing a CALL. The register allocation:
-// - R_TMP0 scratch
-// - R_TMP1 scratch
-// - R_LEN length or x
-// - R_OFF offset
-// - R_SRC &src[s]
-// - R_DST &dst[d]
-// + R_DBASE dst_base
-// + R_DLEN dst_len
-// + R_DEND dst_base + dst_len
-// + R_SBASE src_base
-// + R_SLEN src_len
-// + R_SEND src_base + src_len
-// - R_TMP2 used by doCopy
-// - R_TMP3 used by doCopy
-//
-// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
-// function, and after a CALL returns, and are not otherwise modified.
-//
-// The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST.
-// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
-TEXT ·s2Decode(SB), NOSPLIT, $56-64
- // Initialize R_SRC, R_DST and R_DBASE-R_SEND.
- MOVD dst_base+0(FP), R_DBASE
- MOVD dst_len+8(FP), R_DLEN
- MOVD R_DBASE, R_DST
- MOVD R_DBASE, R_DEND
- ADD R_DLEN, R_DEND, R_DEND
- MOVD src_base+24(FP), R_SBASE
- MOVD src_len+32(FP), R_SLEN
- MOVD R_SBASE, R_SRC
- MOVD R_SBASE, R_SEND
- ADD R_SLEN, R_SEND, R_SEND
- MOVD $0, R_OFF
-
-loop:
- // for s < len(src)
- CMP R_SEND, R_SRC
- BEQ end
-
- // R_LEN = uint32(src[s])
- //
- // switch src[s] & 0x03
- MOVBU (R_SRC), R_LEN
- MOVW R_LEN, R_TMP1
- ANDW $3, R_TMP1
- MOVW $1, R1
- CMPW R1, R_TMP1
- BGE tagCopy
-
- // ----------------------------------------
- // The code below handles literal tags.
-
- // case tagLiteral:
- // x := uint32(src[s] >> 2)
- // switch
- MOVW $60, R1
- LSRW $2, R_LEN, R_LEN
- CMPW R_LEN, R1
- BLS tagLit60Plus
-
- // case x < 60:
- // s++
- ADD $1, R_SRC, R_SRC
-
-doLit:
- // This is the end of the inner "switch", when we have a literal tag.
- //
- // We assume that R_LEN == x and x fits in a uint32, where x is the variable
- // used in the pure Go decode_other.go code.
-
- // length = int(x) + 1
- //
- // Unlike the pure Go code, we don't need to check if length <= 0 because
- // R_LEN can hold 64 bits, so the increment cannot overflow.
- ADD $1, R_LEN, R_LEN
-
- // Prepare to check if copying length bytes will run past the end of dst or
- // src.
- //
- // R_TMP0 = len(dst) - d
- // R_TMP1 = len(src) - s
- MOVD R_DEND, R_TMP0
- SUB R_DST, R_TMP0, R_TMP0
- MOVD R_SEND, R_TMP1
- SUB R_SRC, R_TMP1, R_TMP1
-
- // !!! Try a faster technique for short (16 or fewer bytes) copies.
- //
- // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
- // goto callMemmove // Fall back on calling runtime·memmove.
- // }
- //
- // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
- // against 21 instead of 16, because it cannot assume that all of its input
- // is contiguous in memory and so it needs to leave enough source bytes to
- // read the next tag without refilling buffers, but Go's Decode assumes
- // contiguousness (the src argument is a []byte).
- CMP $16, R_LEN
- BGT callMemmove
- CMP $16, R_TMP0
- BLT callMemmove
- CMP $16, R_TMP1
- BLT callMemmove
-
- // !!! Implement the copy from src to dst as a 16-byte load and store.
- // (Decode's documentation says that dst and src must not overlap.)
- //
- // This always copies 16 bytes, instead of only length bytes, but that's
- // OK. If the input is a valid Snappy encoding then subsequent iterations
- // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
- // non-nil error), so the overrun will be ignored.
- //
- // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
- // 16-byte loads and stores. This technique probably wouldn't be as
- // effective on architectures that are fussier about alignment.
- LDP 0(R_SRC), (R_TMP2, R_TMP3)
- STP (R_TMP2, R_TMP3), 0(R_DST)
-
- // d += length
- // s += length
- ADD R_LEN, R_DST, R_DST
- ADD R_LEN, R_SRC, R_SRC
- B loop
-
-callMemmove:
- // if length > len(dst)-d || length > len(src)-s { etc }
- CMP R_TMP0, R_LEN
- BGT errCorrupt
- CMP R_TMP1, R_LEN
- BGT errCorrupt
-
- // copy(dst[d:], src[s:s+length])
- //
- // This means calling runtime·memmove(&dst[d], &src[s], length), so we push
- // R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
- // three registers to the stack, to save local variables across the CALL.
- MOVD R_DST, 8(RSP)
- MOVD R_SRC, 16(RSP)
- MOVD R_LEN, 24(RSP)
- MOVD R_DST, 32(RSP)
- MOVD R_SRC, 40(RSP)
- MOVD R_LEN, 48(RSP)
- MOVD R_OFF, 56(RSP)
- CALL runtime·memmove(SB)
-
- // Restore local variables: unspill registers from the stack and
- // re-calculate R_DBASE-R_SEND.
- MOVD 32(RSP), R_DST
- MOVD 40(RSP), R_SRC
- MOVD 48(RSP), R_LEN
- MOVD 56(RSP), R_OFF
- MOVD dst_base+0(FP), R_DBASE
- MOVD dst_len+8(FP), R_DLEN
- MOVD R_DBASE, R_DEND
- ADD R_DLEN, R_DEND, R_DEND
- MOVD src_base+24(FP), R_SBASE
- MOVD src_len+32(FP), R_SLEN
- MOVD R_SBASE, R_SEND
- ADD R_SLEN, R_SEND, R_SEND
-
- // d += length
- // s += length
- ADD R_LEN, R_DST, R_DST
- ADD R_LEN, R_SRC, R_SRC
- B loop
-
-tagLit60Plus:
- // !!! This fragment does the
- //
- // s += x - 58; if uint(s) > uint(len(src)) { etc }
- //
- // checks. In the asm version, we code it once instead of once per switch case.
- ADD R_LEN, R_SRC, R_SRC
- SUB $58, R_SRC, R_SRC
- TEST_SRC()
-
- // case x == 60:
- MOVW $61, R1
- CMPW R1, R_LEN
- BEQ tagLit61
- BGT tagLit62Plus
-
- // x = uint32(src[s-1])
- MOVBU -1(R_SRC), R_LEN
- B doLit
-
-tagLit61:
- // case x == 61:
- // x = uint32(src[s-2]) | uint32(src[s-1])<<8
- MOVHU -2(R_SRC), R_LEN
- B doLit
-
-tagLit62Plus:
- CMPW $62, R_LEN
- BHI tagLit63
-
- // case x == 62:
- // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
- MOVHU -3(R_SRC), R_LEN
- MOVBU -1(R_SRC), R_TMP1
- ORR R_TMP1<<16, R_LEN
- B doLit
-
-tagLit63:
- // case x == 63:
- // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
- MOVWU -4(R_SRC), R_LEN
- B doLit
-
- // The code above handles literal tags.
- // ----------------------------------------
- // The code below handles copy tags.
-
-tagCopy4:
- // case tagCopy4:
- // s += 5
- ADD $5, R_SRC, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- MOVD R_SRC, R_TMP1
- SUB R_SBASE, R_TMP1, R_TMP1
- CMP R_SLEN, R_TMP1
- BGT errCorrupt
-
- // length = 1 + int(src[s-5])>>2
- MOVD $1, R1
- ADD R_LEN>>2, R1, R_LEN
-
- // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
- MOVWU -4(R_SRC), R_OFF
- B doCopy
-
-tagCopy2:
- // case tagCopy2:
- // s += 3
- ADD $3, R_SRC, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- TEST_SRC()
-
- // length = 1 + int(src[s-3])>>2
- MOVD $1, R1
- ADD R_LEN>>2, R1, R_LEN
-
- // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
- MOVHU -2(R_SRC), R_OFF
- B doCopy
-
-tagCopy:
- // We have a copy tag. We assume that:
- // - R_TMP1 == src[s] & 0x03
- // - R_LEN == src[s]
- CMP $2, R_TMP1
- BEQ tagCopy2
- BGT tagCopy4
-
- // case tagCopy1:
- // s += 2
- ADD $2, R_SRC, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- TEST_SRC()
-
- // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
- // Calculate offset in R_TMP0 in case it is a repeat.
- MOVD R_LEN, R_TMP0
- AND $0xe0, R_TMP0
- MOVBU -1(R_SRC), R_TMP1
- ORR R_TMP0<<3, R_TMP1, R_TMP0
-
- // length = 4 + int(src[s-2])>>2&0x7
- MOVD $7, R1
- AND R_LEN>>2, R1, R_LEN
- ADD $4, R_LEN, R_LEN
-
- // check if repeat code with offset 0.
- CMP $0, R_TMP0
- BEQ repeatCode
-
- // This is a regular copy, transfer our temporary value to R_OFF (offset)
- MOVD R_TMP0, R_OFF
- B doCopy
-
- // This is a repeat code.
-repeatCode:
- // If length < 9, reuse last offset, with the length already calculated.
- CMP $9, R_LEN
- BLT doCopyRepeat
- BEQ repeatLen1
- CMP $10, R_LEN
- BEQ repeatLen2
-
-repeatLen3:
- // s +=3
- ADD $3, R_SRC, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- TEST_SRC()
-
- // length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + 65540
- MOVBU -1(R_SRC), R_TMP0
- MOVHU -3(R_SRC), R_LEN
- ORR R_TMP0<<16, R_LEN, R_LEN
- ADD $65540, R_LEN, R_LEN
- B doCopyRepeat
-
-repeatLen2:
- // s +=2
- ADD $2, R_SRC, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- TEST_SRC()
-
- // length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + 260
- MOVHU -2(R_SRC), R_LEN
- ADD $260, R_LEN, R_LEN
- B doCopyRepeat
-
-repeatLen1:
- // s +=1
- ADD $1, R_SRC, R_SRC
-
- // if uint(s) > uint(len(src)) { etc }
- TEST_SRC()
-
- // length = src[s-1] + 8
- MOVBU -1(R_SRC), R_LEN
- ADD $8, R_LEN, R_LEN
- B doCopyRepeat
-
-doCopy:
- // This is the end of the outer "switch", when we have a copy tag.
- //
- // We assume that:
- // - R_LEN == length && R_LEN > 0
- // - R_OFF == offset
-
- // if d < offset { etc }
- MOVD R_DST, R_TMP1
- SUB R_DBASE, R_TMP1, R_TMP1
- CMP R_OFF, R_TMP1
- BLT errCorrupt
-
- // Repeat values can skip the test above, since any offset > 0 will be in dst.
-doCopyRepeat:
-
- // if offset <= 0 { etc }
- CMP $0, R_OFF
- BLE errCorrupt
-
- // if length > len(dst)-d { etc }
- MOVD R_DEND, R_TMP1
- SUB R_DST, R_TMP1, R_TMP1
- CMP R_TMP1, R_LEN
- BGT errCorrupt
-
- // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
- //
- // Set:
- // - R_TMP2 = len(dst)-d
- // - R_TMP3 = &dst[d-offset]
- MOVD R_DEND, R_TMP2
- SUB R_DST, R_TMP2, R_TMP2
- MOVD R_DST, R_TMP3
- SUB R_OFF, R_TMP3, R_TMP3
-
- // !!! Try a faster technique for short (16 or fewer bytes) forward copies.
- //
- // First, try using two 8-byte load/stores, similar to the doLit technique
- // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
- // still OK if offset >= 8. Note that this has to be two 8-byte load/stores
- // and not one 16-byte load/store, and the first store has to be before the
- // second load, due to the overlap if offset is in the range [8, 16).
- //
- // if length > 16 || offset < 8 || len(dst)-d < 16 {
- // goto slowForwardCopy
- // }
- // copy 16 bytes
- // d += length
- CMP $16, R_LEN
- BGT slowForwardCopy
- CMP $8, R_OFF
- BLT slowForwardCopy
- CMP $16, R_TMP2
- BLT slowForwardCopy
- MOVD 0(R_TMP3), R_TMP0
- MOVD R_TMP0, 0(R_DST)
- MOVD 8(R_TMP3), R_TMP1
- MOVD R_TMP1, 8(R_DST)
- ADD R_LEN, R_DST, R_DST
- B loop
-
-slowForwardCopy:
- // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
- // can still try 8-byte load stores, provided we can overrun up to 10 extra
- // bytes. As above, the overrun will be fixed up by subsequent iterations
- // of the outermost loop.
- //
- // The C++ snappy code calls this technique IncrementalCopyFastPath. Its
- // commentary says:
- //
- // ----
- //
- // The main part of this loop is a simple copy of eight bytes at a time
- // until we've copied (at least) the requested amount of bytes. However,
- // if d and d-offset are less than eight bytes apart (indicating a
- // repeating pattern of length < 8), we first need to expand the pattern in
- // order to get the correct results. For instance, if the buffer looks like
- // this, with the eight-byte <d-offset> and <d> patterns marked as
- // intervals:
- //
- // abxxxxxxxxxxxx
- // [------] d-offset
- // [------] d
- //
- // a single eight-byte copy from <d-offset> to <d> will repeat the pattern
- // once, after which we can move <d> two bytes without moving <d-offset>:
- //
- // ababxxxxxxxxxx
- // [------] d-offset
- // [------] d
- //
- // and repeat the exercise until the two no longer overlap.
- //
- // This allows us to do very well in the special case of one single byte
- // repeated many times, without taking a big hit for more general cases.
- //
- // The worst case of extra writing past the end of the match occurs when
- // offset == 1 and length == 1; the last copy will read from byte positions
- // [0..7] and write to [4..11], whereas it was only supposed to write to
- // position 1. Thus, ten excess bytes.
- //
- // ----
- //
- // That "10 byte overrun" worst case is confirmed by Go's
- // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
- // and finishSlowForwardCopy algorithm.
- //
- // if length > len(dst)-d-10 {
- // goto verySlowForwardCopy
- // }
- SUB $10, R_TMP2, R_TMP2
- CMP R_TMP2, R_LEN
- BGT verySlowForwardCopy
-
- // We want to keep the offset, so we use R_TMP2 from here.
- MOVD R_OFF, R_TMP2
-
-makeOffsetAtLeast8:
- // !!! As above, expand the pattern so that offset >= 8 and we can use
- // 8-byte load/stores.
- //
- // for offset < 8 {
- // copy 8 bytes from dst[d-offset:] to dst[d:]
- // length -= offset
- // d += offset
- // offset += offset
- // // The two previous lines together means that d-offset, and therefore
- // // R_TMP3, is unchanged.
- // }
- CMP $8, R_TMP2
- BGE fixUpSlowForwardCopy
- MOVD (R_TMP3), R_TMP1
- MOVD R_TMP1, (R_DST)
- SUB R_TMP2, R_LEN, R_LEN
- ADD R_TMP2, R_DST, R_DST
- ADD R_TMP2, R_TMP2, R_TMP2
- B makeOffsetAtLeast8
-
-fixUpSlowForwardCopy:
- // !!! Add length (which might be negative now) to d (implied by R_DST being
- // &dst[d]) so that d ends up at the right place when we jump back to the
- // top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
- // length is positive, copying the remaining length bytes will write to the
- // right place.
- MOVD R_DST, R_TMP0
- ADD R_LEN, R_DST, R_DST
-
-finishSlowForwardCopy:
- // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
- // length means that we overrun, but as above, that will be fixed up by
- // subsequent iterations of the outermost loop.
- MOVD $0, R1
- CMP R1, R_LEN
- BLE loop
- MOVD (R_TMP3), R_TMP1
- MOVD R_TMP1, (R_TMP0)
- ADD $8, R_TMP3, R_TMP3
- ADD $8, R_TMP0, R_TMP0
- SUB $8, R_LEN, R_LEN
- B finishSlowForwardCopy
-
-verySlowForwardCopy:
- // verySlowForwardCopy is a simple implementation of forward copy. In C
- // parlance, this is a do/while loop instead of a while loop, since we know
- // that length > 0. In Go syntax:
- //
- // for {
- // dst[d] = dst[d - offset]
- // d++
- // length--
- // if length == 0 {
- // break
- // }
- // }
- MOVB (R_TMP3), R_TMP1
- MOVB R_TMP1, (R_DST)
- ADD $1, R_TMP3, R_TMP3
- ADD $1, R_DST, R_DST
- SUB $1, R_LEN, R_LEN
- CBNZ R_LEN, verySlowForwardCopy
- B loop
-
- // The code above handles copy tags.
- // ----------------------------------------
-
-end:
- // This is the end of the "for s < len(src)".
- //
- // if d != len(dst) { etc }
- CMP R_DEND, R_DST
- BNE errCorrupt
-
- // return 0
- MOVD $0, ret+48(FP)
- RET
-
-errCorrupt:
- // return decodeErrCodeCorrupt
- MOVD $1, R_TMP0
- MOVD R_TMP0, ret+48(FP)
- RET
diff --git a/vendor/github.com/klauspost/compress/s2/decode_asm.go b/vendor/github.com/klauspost/compress/s2/decode_asm.go
deleted file mode 100644
index cb3576edd..000000000
--- a/vendor/github.com/klauspost/compress/s2/decode_asm.go
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (amd64 || arm64) && !appengine && gc && !noasm
-// +build amd64 arm64
-// +build !appengine
-// +build gc
-// +build !noasm
-
-package s2
-
-// decode has the same semantics as in decode_other.go.
-//
-//go:noescape
-func s2Decode(dst, src []byte) int
diff --git a/vendor/github.com/klauspost/compress/s2/decode_other.go b/vendor/github.com/klauspost/compress/s2/decode_other.go
deleted file mode 100644
index 1074ebd21..000000000
--- a/vendor/github.com/klauspost/compress/s2/decode_other.go
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (!amd64 && !arm64) || appengine || !gc || noasm
-// +build !amd64,!arm64 appengine !gc noasm
-
-package s2
-
-import (
- "fmt"
- "strconv"
-)
-
-// decode writes the decoding of src to dst. It assumes that the varint-encoded
-// length of the decompressed bytes has already been read, and that len(dst)
-// equals that length.
-//
-// It returns 0 on success or a decodeErrCodeXxx error code on failure.
-func s2Decode(dst, src []byte) int {
- const debug = false
- if debug {
- fmt.Println("Starting decode, dst len:", len(dst))
- }
- var d, s, length int
- offset := 0
-
- // As long as we can read at least 5 bytes...
- for s < len(src)-5 {
- switch src[s] & 0x03 {
- case tagLiteral:
- x := uint32(src[s] >> 2)
- switch {
- case x < 60:
- s++
- case x == 60:
- s += 2
- x = uint32(src[s-1])
- case x == 61:
- s += 3
- x = uint32(src[s-2]) | uint32(src[s-1])<<8
- case x == 62:
- s += 4
- x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
- case x == 63:
- s += 5
- x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
- }
- length = int(x) + 1
- if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
- return decodeErrCodeCorrupt
- }
- if debug {
- fmt.Println("literals, length:", length, "d-after:", d+length)
- }
-
- copy(dst[d:], src[s:s+length])
- d += length
- s += length
- continue
-
- case tagCopy1:
- s += 2
- length = int(src[s-2]) >> 2 & 0x7
- toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
- if toffset == 0 {
- if debug {
- fmt.Print("(repeat) ")
- }
- // keep last offset
- switch length {
- case 5:
- s += 1
- length = int(uint32(src[s-1])) + 4
- case 6:
- s += 2
- length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
- case 7:
- s += 3
- length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
- default: // 0-> 4
- }
- } else {
- offset = toffset
- }
- length += 4
- case tagCopy2:
- s += 3
- length = 1 + int(src[s-3])>>2
- offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-
- case tagCopy4:
- s += 5
- length = 1 + int(src[s-5])>>2
- offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
- }
-
- if offset <= 0 || d < offset || length > len(dst)-d {
- return decodeErrCodeCorrupt
- }
-
- if debug {
- fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
- }
-
- // Copy from an earlier sub-slice of dst to a later sub-slice.
- // If no overlap, use the built-in copy:
- if offset > length {
- copy(dst[d:d+length], dst[d-offset:])
- d += length
- continue
- }
-
- // Unlike the built-in copy function, this byte-by-byte copy always runs
- // forwards, even if the slices overlap. Conceptually, this is:
- //
- // d += forwardCopy(dst[d:d+length], dst[d-offset:])
- //
- // We align the slices into a and b and show the compiler they are the same size.
- // This allows the loop to run without bounds checks.
- a := dst[d : d+length]
- b := dst[d-offset:]
- b = b[:len(a)]
- for i := range a {
- a[i] = b[i]
- }
- d += length
- }
-
- // Remaining with extra checks...
- for s < len(src) {
- switch src[s] & 0x03 {
- case tagLiteral:
- x := uint32(src[s] >> 2)
- switch {
- case x < 60:
- s++
- case x == 60:
- s += 2
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- x = uint32(src[s-1])
- case x == 61:
- s += 3
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- x = uint32(src[s-2]) | uint32(src[s-1])<<8
- case x == 62:
- s += 4
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
- case x == 63:
- s += 5
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
- }
- length = int(x) + 1
- if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
- return decodeErrCodeCorrupt
- }
- if debug {
- fmt.Println("literals, length:", length, "d-after:", d+length)
- }
-
- copy(dst[d:], src[s:s+length])
- d += length
- s += length
- continue
-
- case tagCopy1:
- s += 2
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- length = int(src[s-2]) >> 2 & 0x7
- toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
- if toffset == 0 {
- if debug {
- fmt.Print("(repeat) ")
- }
- // keep last offset
- switch length {
- case 5:
- s += 1
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- length = int(uint32(src[s-1])) + 4
- case 6:
- s += 2
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
- case 7:
- s += 3
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
- default: // 0-> 4
- }
- } else {
- offset = toffset
- }
- length += 4
- case tagCopy2:
- s += 3
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- length = 1 + int(src[s-3])>>2
- offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-
- case tagCopy4:
- s += 5
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- length = 1 + int(src[s-5])>>2
- offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
- }
-
- if offset <= 0 || d < offset || length > len(dst)-d {
- return decodeErrCodeCorrupt
- }
-
- if debug {
- fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
- }
-
- // Copy from an earlier sub-slice of dst to a later sub-slice.
- // If no overlap, use the built-in copy:
- if offset > length {
- copy(dst[d:d+length], dst[d-offset:])
- d += length
- continue
- }
-
- // Unlike the built-in copy function, this byte-by-byte copy always runs
- // forwards, even if the slices overlap. Conceptually, this is:
- //
- // d += forwardCopy(dst[d:d+length], dst[d-offset:])
- //
- // We align the slices into a and b and show the compiler they are the same size.
- // This allows the loop to run without bounds checks.
- a := dst[d : d+length]
- b := dst[d-offset:]
- b = b[:len(a)]
- for i := range a {
- a[i] = b[i]
- }
- d += length
- }
-
- if d != len(dst) {
- return decodeErrCodeCorrupt
- }
- return 0
-}
diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go
deleted file mode 100644
index 59f992ca6..000000000
--- a/vendor/github.com/klauspost/compress/s2/encode.go
+++ /dev/null
@@ -1,1347 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
- "crypto/rand"
- "encoding/binary"
- "errors"
- "fmt"
- "io"
- "math"
- "math/bits"
- "runtime"
- "sync"
-)
-
-// Encode returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func Encode(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if cap(dst) < n {
- dst = make([]byte, n)
- } else {
- dst = dst[:n]
- }
-
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
-
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlock(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
-}
-
-// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// EncodeBetter compresses better than Encode but typically with a
-// 10-40% speed decrease on both compression and decompression.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeBetter(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if len(dst) < n {
- dst = make([]byte, n)
- }
-
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
-
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlockBetter(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
-}
-
-// EncodeBest returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// EncodeBest compresses as good as reasonably possible but with a
-// big speed decrease.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeBest(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if len(dst) < n {
- dst = make([]byte, n)
- }
-
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
-
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlockBest(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
-}
-
-// EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The output is Snappy compatible and will likely decompress faster.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeSnappy(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if cap(dst) < n {
- dst = make([]byte, n)
- } else {
- dst = dst[:n]
- }
-
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
-
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
-
- n := encodeBlockSnappy(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
-}
-
-// EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The output is Snappy compatible and will likely decompress faster.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeSnappyBetter(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if cap(dst) < n {
- dst = make([]byte, n)
- } else {
- dst = dst[:n]
- }
-
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
-
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
-
- n := encodeBlockBetterSnappy(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
-}
-
-// EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The output is Snappy compatible and will likely decompress faster.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeSnappyBest(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if cap(dst) < n {
- dst = make([]byte, n)
- } else {
- dst = dst[:n]
- }
-
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
-
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
-
- n := encodeBlockBestSnappy(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
-}
-
-// ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
-// If the destination is nil or too small, a new will be allocated.
-// The blocks are not validated, so garbage in = garbage out.
-// dst may not overlap block data.
-// Any data in dst is preserved as is, so it will not be considered a block.
-func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
- totalSize := uint64(0)
- compSize := 0
- for _, b := range blocks {
- l, hdr, err := decodedLen(b)
- if err != nil {
- return nil, err
- }
- totalSize += uint64(l)
- compSize += len(b) - hdr
- }
- if totalSize == 0 {
- dst = append(dst, 0)
- return dst, nil
- }
- if totalSize > math.MaxUint32 {
- return nil, ErrTooLarge
- }
- var tmp [binary.MaxVarintLen32]byte
- hdrSize := binary.PutUvarint(tmp[:], totalSize)
- wantSize := hdrSize + compSize
-
- if cap(dst)-len(dst) < wantSize {
- dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
- }
- dst = append(dst, tmp[:hdrSize]...)
- for _, b := range blocks {
- _, hdr, err := decodedLen(b)
- if err != nil {
- return nil, err
- }
- dst = append(dst, b[hdr:]...)
- }
- return dst, nil
-}
-
-// inputMargin is the minimum number of extra input bytes to keep, inside
-// encodeBlock's inner loop. On some architectures, this margin lets us
-// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
-// literals can be implemented as a single load to and store from a 16-byte
-// register. That literal's actual length can be as short as 1 byte, so this
-// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
-// the encoding loop will fix up the copy overrun, and this inputMargin ensures
-// that we don't overrun the dst and src buffers.
-const inputMargin = 8
-
-// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
-// will be accepted by the encoder.
-const minNonLiteralBlockSize = 32
-
-// MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
-// Blocks this big are highly discouraged, though.
-const MaxBlockSize = math.MaxUint32 - binary.MaxVarintLen32 - 5
-
-// MaxEncodedLen returns the maximum length of a snappy block, given its
-// uncompressed length.
-//
-// It will return a negative value if srcLen is too large to encode.
-// 32 bit platforms will have lower thresholds for rejecting big content.
-func MaxEncodedLen(srcLen int) int {
- n := uint64(srcLen)
- if n > 0xffffffff {
- // Also includes negative.
- return -1
- }
- // Size of the varint encoded block size.
- n = n + uint64((bits.Len64(n)+7)/7)
-
- // Add maximum size of encoding block as literals.
- n += uint64(literalExtraSize(int64(srcLen)))
- if n > 0xffffffff {
- return -1
- }
- return int(n)
-}
-
-var errClosed = errors.New("s2: Writer is closed")
-
-// NewWriter returns a new Writer that compresses to w, using the
-// framing format described at
-// https://github.com/google/snappy/blob/master/framing_format.txt
-//
-// Users must call Close to guarantee all data has been forwarded to
-// the underlying io.Writer and that resources are released.
-// They may also call Flush zero or more times before calling Close.
-func NewWriter(w io.Writer, opts ...WriterOption) *Writer {
- w2 := Writer{
- blockSize: defaultBlockSize,
- concurrency: runtime.GOMAXPROCS(0),
- randSrc: rand.Reader,
- level: levelFast,
- }
- for _, opt := range opts {
- if err := opt(&w2); err != nil {
- w2.errState = err
- return &w2
- }
- }
- w2.obufLen = obufHeaderLen + MaxEncodedLen(w2.blockSize)
- w2.paramsOK = true
- w2.ibuf = make([]byte, 0, w2.blockSize)
- w2.buffers.New = func() interface{} {
- return make([]byte, w2.obufLen)
- }
- w2.Reset(w)
- return &w2
-}
-
-// Writer is an io.Writer that can write Snappy-compressed bytes.
-type Writer struct {
- errMu sync.Mutex
- errState error
-
- // ibuf is a buffer for the incoming (uncompressed) bytes.
- ibuf []byte
-
- blockSize int
- obufLen int
- concurrency int
- written int64
- uncompWritten int64 // Bytes sent to compression
- output chan chan result
- buffers sync.Pool
- pad int
-
- writer io.Writer
- randSrc io.Reader
- writerWg sync.WaitGroup
- index Index
-
- // wroteStreamHeader is whether we have written the stream header.
- wroteStreamHeader bool
- paramsOK bool
- snappy bool
- flushOnWrite bool
- appendIndex bool
- level uint8
-}
-
-const (
- levelUncompressed = iota + 1
- levelFast
- levelBetter
- levelBest
-)
-
-type result struct {
- b []byte
- // Uncompressed start offset
- startOffset int64
-}
-
-// err returns the previously set error.
-// If no error has been set it is set to err if not nil.
-func (w *Writer) err(err error) error {
- w.errMu.Lock()
- errSet := w.errState
- if errSet == nil && err != nil {
- w.errState = err
- errSet = err
- }
- w.errMu.Unlock()
- return errSet
-}
-
-// Reset discards the writer's state and switches the Snappy writer to write to w.
-// This permits reusing a Writer rather than allocating a new one.
-func (w *Writer) Reset(writer io.Writer) {
- if !w.paramsOK {
- return
- }
- // Close previous writer, if any.
- if w.output != nil {
- close(w.output)
- w.writerWg.Wait()
- w.output = nil
- }
- w.errState = nil
- w.ibuf = w.ibuf[:0]
- w.wroteStreamHeader = false
- w.written = 0
- w.writer = writer
- w.uncompWritten = 0
- w.index.reset(w.blockSize)
-
- // If we didn't get a writer, stop here.
- if writer == nil {
- return
- }
- // If no concurrency requested, don't spin up writer goroutine.
- if w.concurrency == 1 {
- return
- }
-
- toWrite := make(chan chan result, w.concurrency)
- w.output = toWrite
- w.writerWg.Add(1)
-
- // Start a writer goroutine that will write all output in order.
- go func() {
- defer w.writerWg.Done()
-
- // Get a queued write.
- for write := range toWrite {
- // Wait for the data to be available.
- input := <-write
- in := input.b
- if len(in) > 0 {
- if w.err(nil) == nil {
- // Don't expose data from previous buffers.
- toWrite := in[:len(in):len(in)]
- // Write to output.
- n, err := writer.Write(toWrite)
- if err == nil && n != len(toWrite) {
- err = io.ErrShortBuffer
- }
- _ = w.err(err)
- w.err(w.index.add(w.written, input.startOffset))
- w.written += int64(n)
- }
- }
- if cap(in) >= w.obufLen {
- w.buffers.Put(in)
- }
- // close the incoming write request.
- // This can be used for synchronizing flushes.
- close(write)
- }
- }()
-}
-
-// Write satisfies the io.Writer interface.
-func (w *Writer) Write(p []byte) (nRet int, errRet error) {
- if err := w.err(nil); err != nil {
- return 0, err
- }
- if w.flushOnWrite {
- return w.write(p)
- }
- // If we exceed the input buffer size, start writing
- for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err(nil) == nil {
- var n int
- if len(w.ibuf) == 0 {
- // Large write, empty buffer.
- // Write directly from p to avoid copy.
- n, _ = w.write(p)
- } else {
- n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
- w.ibuf = w.ibuf[:len(w.ibuf)+n]
- w.write(w.ibuf)
- w.ibuf = w.ibuf[:0]
- }
- nRet += n
- p = p[n:]
- }
- if err := w.err(nil); err != nil {
- return nRet, err
- }
- // p should always be able to fit into w.ibuf now.
- n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
- w.ibuf = w.ibuf[:len(w.ibuf)+n]
- nRet += n
- return nRet, nil
-}
-
-// ReadFrom implements the io.ReaderFrom interface.
-// Using this is typically more efficient since it avoids a memory copy.
-// ReadFrom reads data from r until EOF or error.
-// The return value n is the number of bytes read.
-// Any error except io.EOF encountered during the read is also returned.
-func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) {
- if err := w.err(nil); err != nil {
- return 0, err
- }
- if len(w.ibuf) > 0 {
- err := w.Flush()
- if err != nil {
- return 0, err
- }
- }
- if br, ok := r.(byter); ok {
- buf := br.Bytes()
- if err := w.EncodeBuffer(buf); err != nil {
- return 0, err
- }
- return int64(len(buf)), w.Flush()
- }
- for {
- inbuf := w.buffers.Get().([]byte)[:w.blockSize+obufHeaderLen]
- n2, err := io.ReadFull(r, inbuf[obufHeaderLen:])
- if err != nil {
- if err == io.ErrUnexpectedEOF {
- err = io.EOF
- }
- if err != io.EOF {
- return n, w.err(err)
- }
- }
- if n2 == 0 {
- break
- }
- n += int64(n2)
- err2 := w.writeFull(inbuf[:n2+obufHeaderLen])
- if w.err(err2) != nil {
- break
- }
-
- if err != nil {
- // We got EOF and wrote everything
- break
- }
- }
-
- return n, w.err(nil)
-}
-
-// AddSkippableBlock will add a skippable block to the stream.
-// The ID must be 0x80-0xfe (inclusive).
-// Length of the skippable block must be <= 16777215 bytes.
-func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) {
- if err := w.err(nil); err != nil {
- return err
- }
- if len(data) == 0 {
- return nil
- }
- if id < 0x80 || id > chunkTypePadding {
- return fmt.Errorf("invalid skippable block id %x", id)
- }
- if len(data) > maxChunkSize {
- return fmt.Errorf("skippable block excessed maximum size")
- }
- var header [4]byte
- chunkLen := 4 + len(data)
- header[0] = id
- header[1] = uint8(chunkLen >> 0)
- header[2] = uint8(chunkLen >> 8)
- header[3] = uint8(chunkLen >> 16)
- if w.concurrency == 1 {
- write := func(b []byte) error {
- n, err := w.writer.Write(b)
- if err = w.err(err); err != nil {
- return err
- }
- if n != len(data) {
- return w.err(io.ErrShortWrite)
- }
- w.written += int64(n)
- return w.err(nil)
- }
- if !w.wroteStreamHeader {
- w.wroteStreamHeader = true
- if w.snappy {
- if err := write([]byte(magicChunkSnappy)); err != nil {
- return err
- }
- } else {
- if err := write([]byte(magicChunk)); err != nil {
- return err
- }
- }
- }
- if err := write(header[:]); err != nil {
- return err
- }
- if err := write(data); err != nil {
- return err
- }
- }
-
- // Create output...
- if !w.wroteStreamHeader {
- w.wroteStreamHeader = true
- hWriter := make(chan result)
- w.output <- hWriter
- if w.snappy {
- hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
- } else {
- hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
- }
- }
-
- // Copy input.
- inbuf := w.buffers.Get().([]byte)[:4]
- copy(inbuf, header[:])
- inbuf = append(inbuf, data...)
-
- output := make(chan result, 1)
- // Queue output.
- w.output <- output
- output <- result{startOffset: w.uncompWritten, b: inbuf}
-
- return nil
-}
-
-// EncodeBuffer will add a buffer to the stream.
-// This is the fastest way to encode a stream,
-// but the input buffer cannot be written to by the caller
-// until Flush or Close has been called when concurrency != 1.
-//
-// If you cannot control that, use the regular Write function.
-//
-// Note that input is not buffered.
-// This means that each write will result in discrete blocks being created.
-// For buffered writes, use the regular Write function.
-func (w *Writer) EncodeBuffer(buf []byte) (err error) {
- if err := w.err(nil); err != nil {
- return err
- }
-
- if w.flushOnWrite {
- _, err := w.write(buf)
- return err
- }
- // Flush queued data first.
- if len(w.ibuf) > 0 {
- err := w.Flush()
- if err != nil {
- return err
- }
- }
- if w.concurrency == 1 {
- _, err := w.writeSync(buf)
- return err
- }
-
- // Spawn goroutine and write block to output channel.
- if !w.wroteStreamHeader {
- w.wroteStreamHeader = true
- hWriter := make(chan result)
- w.output <- hWriter
- if w.snappy {
- hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
- } else {
- hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
- }
- }
-
- for len(buf) > 0 {
- // Cut input.
- uncompressed := buf
- if len(uncompressed) > w.blockSize {
- uncompressed = uncompressed[:w.blockSize]
- }
- buf = buf[len(uncompressed):]
- // Get an output buffer.
- obuf := w.buffers.Get().([]byte)[:len(uncompressed)+obufHeaderLen]
- output := make(chan result)
- // Queue output now, so we keep order.
- w.output <- output
- res := result{
- startOffset: w.uncompWritten,
- }
- w.uncompWritten += int64(len(uncompressed))
- go func() {
- checksum := crc(uncompressed)
-
- // Set to uncompressed.
- chunkType := uint8(chunkTypeUncompressedData)
- chunkLen := 4 + len(uncompressed)
-
- // Attempt compressing.
- n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed)))
- n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed)
-
- // Check if we should use this, or store as uncompressed instead.
- if n2 > 0 {
- chunkType = uint8(chunkTypeCompressedData)
- chunkLen = 4 + n + n2
- obuf = obuf[:obufHeaderLen+n+n2]
- } else {
- // copy uncompressed
- copy(obuf[obufHeaderLen:], uncompressed)
- }
-
- // Fill in the per-chunk header that comes before the body.
- obuf[0] = chunkType
- obuf[1] = uint8(chunkLen >> 0)
- obuf[2] = uint8(chunkLen >> 8)
- obuf[3] = uint8(chunkLen >> 16)
- obuf[4] = uint8(checksum >> 0)
- obuf[5] = uint8(checksum >> 8)
- obuf[6] = uint8(checksum >> 16)
- obuf[7] = uint8(checksum >> 24)
-
- // Queue final output.
- res.b = obuf
- output <- res
- }()
- }
- return nil
-}
-
-func (w *Writer) encodeBlock(obuf, uncompressed []byte) int {
- if w.snappy {
- switch w.level {
- case levelFast:
- return encodeBlockSnappy(obuf, uncompressed)
- case levelBetter:
- return encodeBlockBetterSnappy(obuf, uncompressed)
- case levelBest:
- return encodeBlockBestSnappy(obuf, uncompressed)
- }
- return 0
- }
- switch w.level {
- case levelFast:
- return encodeBlock(obuf, uncompressed)
- case levelBetter:
- return encodeBlockBetter(obuf, uncompressed)
- case levelBest:
- return encodeBlockBest(obuf, uncompressed)
- }
- return 0
-}
-
-func (w *Writer) write(p []byte) (nRet int, errRet error) {
- if err := w.err(nil); err != nil {
- return 0, err
- }
- if w.concurrency == 1 {
- return w.writeSync(p)
- }
-
- // Spawn goroutine and write block to output channel.
- for len(p) > 0 {
- if !w.wroteStreamHeader {
- w.wroteStreamHeader = true
- hWriter := make(chan result)
- w.output <- hWriter
- if w.snappy {
- hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
- } else {
- hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
- }
- }
-
- var uncompressed []byte
- if len(p) > w.blockSize {
- uncompressed, p = p[:w.blockSize], p[w.blockSize:]
- } else {
- uncompressed, p = p, nil
- }
-
- // Copy input.
- // If the block is incompressible, this is used for the result.
- inbuf := w.buffers.Get().([]byte)[:len(uncompressed)+obufHeaderLen]
- obuf := w.buffers.Get().([]byte)[:w.obufLen]
- copy(inbuf[obufHeaderLen:], uncompressed)
- uncompressed = inbuf[obufHeaderLen:]
-
- output := make(chan result)
- // Queue output now, so we keep order.
- w.output <- output
- res := result{
- startOffset: w.uncompWritten,
- }
- w.uncompWritten += int64(len(uncompressed))
-
- go func() {
- checksum := crc(uncompressed)
-
- // Set to uncompressed.
- chunkType := uint8(chunkTypeUncompressedData)
- chunkLen := 4 + len(uncompressed)
-
- // Attempt compressing.
- n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed)))
- n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed)
-
- // Check if we should use this, or store as uncompressed instead.
- if n2 > 0 {
- chunkType = uint8(chunkTypeCompressedData)
- chunkLen = 4 + n + n2
- obuf = obuf[:obufHeaderLen+n+n2]
- } else {
- // Use input as output.
- obuf, inbuf = inbuf, obuf
- }
-
- // Fill in the per-chunk header that comes before the body.
- obuf[0] = chunkType
- obuf[1] = uint8(chunkLen >> 0)
- obuf[2] = uint8(chunkLen >> 8)
- obuf[3] = uint8(chunkLen >> 16)
- obuf[4] = uint8(checksum >> 0)
- obuf[5] = uint8(checksum >> 8)
- obuf[6] = uint8(checksum >> 16)
- obuf[7] = uint8(checksum >> 24)
-
- // Queue final output.
- res.b = obuf
- output <- res
-
- // Put unused buffer back in pool.
- w.buffers.Put(inbuf)
- }()
- nRet += len(uncompressed)
- }
- return nRet, nil
-}
-
-// writeFull is a special version of write that will always write the full buffer.
-// Data to be compressed should start at offset obufHeaderLen and fill the remainder of the buffer.
-// The data will be written as a single block.
-// The caller is not allowed to use inbuf after this function has been called.
-func (w *Writer) writeFull(inbuf []byte) (errRet error) {
- if err := w.err(nil); err != nil {
- return err
- }
-
- if w.concurrency == 1 {
- _, err := w.writeSync(inbuf[obufHeaderLen:])
- return err
- }
-
- // Spawn goroutine and write block to output channel.
- if !w.wroteStreamHeader {
- w.wroteStreamHeader = true
- hWriter := make(chan result)
- w.output <- hWriter
- if w.snappy {
- hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunkSnappy)}
- } else {
- hWriter <- result{startOffset: w.uncompWritten, b: []byte(magicChunk)}
- }
- }
-
- // Get an output buffer.
- obuf := w.buffers.Get().([]byte)[:w.obufLen]
- uncompressed := inbuf[obufHeaderLen:]
-
- output := make(chan result)
- // Queue output now, so we keep order.
- w.output <- output
- res := result{
- startOffset: w.uncompWritten,
- }
- w.uncompWritten += int64(len(uncompressed))
-
- go func() {
- checksum := crc(uncompressed)
-
- // Set to uncompressed.
- chunkType := uint8(chunkTypeUncompressedData)
- chunkLen := 4 + len(uncompressed)
-
- // Attempt compressing.
- n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed)))
- n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed)
-
- // Check if we should use this, or store as uncompressed instead.
- if n2 > 0 {
- chunkType = uint8(chunkTypeCompressedData)
- chunkLen = 4 + n + n2
- obuf = obuf[:obufHeaderLen+n+n2]
- } else {
- // Use input as output.
- obuf, inbuf = inbuf, obuf
- }
-
- // Fill in the per-chunk header that comes before the body.
- obuf[0] = chunkType
- obuf[1] = uint8(chunkLen >> 0)
- obuf[2] = uint8(chunkLen >> 8)
- obuf[3] = uint8(chunkLen >> 16)
- obuf[4] = uint8(checksum >> 0)
- obuf[5] = uint8(checksum >> 8)
- obuf[6] = uint8(checksum >> 16)
- obuf[7] = uint8(checksum >> 24)
-
- // Queue final output.
- res.b = obuf
- output <- res
-
- // Put unused buffer back in pool.
- w.buffers.Put(inbuf)
- }()
- return nil
-}
-
-func (w *Writer) writeSync(p []byte) (nRet int, errRet error) {
- if err := w.err(nil); err != nil {
- return 0, err
- }
- if !w.wroteStreamHeader {
- w.wroteStreamHeader = true
- var n int
- var err error
- if w.snappy {
- n, err = w.writer.Write([]byte(magicChunkSnappy))
- } else {
- n, err = w.writer.Write([]byte(magicChunk))
- }
- if err != nil {
- return 0, w.err(err)
- }
- if n != len(magicChunk) {
- return 0, w.err(io.ErrShortWrite)
- }
- w.written += int64(n)
- }
-
- for len(p) > 0 {
- var uncompressed []byte
- if len(p) > w.blockSize {
- uncompressed, p = p[:w.blockSize], p[w.blockSize:]
- } else {
- uncompressed, p = p, nil
- }
-
- obuf := w.buffers.Get().([]byte)[:w.obufLen]
- checksum := crc(uncompressed)
-
- // Set to uncompressed.
- chunkType := uint8(chunkTypeUncompressedData)
- chunkLen := 4 + len(uncompressed)
-
- // Attempt compressing.
- n := binary.PutUvarint(obuf[obufHeaderLen:], uint64(len(uncompressed)))
- n2 := w.encodeBlock(obuf[obufHeaderLen+n:], uncompressed)
-
- if n2 > 0 {
- chunkType = uint8(chunkTypeCompressedData)
- chunkLen = 4 + n + n2
- obuf = obuf[:obufHeaderLen+n+n2]
- } else {
- obuf = obuf[:8]
- }
-
- // Fill in the per-chunk header that comes before the body.
- obuf[0] = chunkType
- obuf[1] = uint8(chunkLen >> 0)
- obuf[2] = uint8(chunkLen >> 8)
- obuf[3] = uint8(chunkLen >> 16)
- obuf[4] = uint8(checksum >> 0)
- obuf[5] = uint8(checksum >> 8)
- obuf[6] = uint8(checksum >> 16)
- obuf[7] = uint8(checksum >> 24)
-
- n, err := w.writer.Write(obuf)
- if err != nil {
- return 0, w.err(err)
- }
- if n != len(obuf) {
- return 0, w.err(io.ErrShortWrite)
- }
- w.err(w.index.add(w.written, w.uncompWritten))
- w.written += int64(n)
- w.uncompWritten += int64(len(uncompressed))
-
- if chunkType == chunkTypeUncompressedData {
- // Write uncompressed data.
- n, err := w.writer.Write(uncompressed)
- if err != nil {
- return 0, w.err(err)
- }
- if n != len(uncompressed) {
- return 0, w.err(io.ErrShortWrite)
- }
- w.written += int64(n)
- }
- w.buffers.Put(obuf)
- // Queue final output.
- nRet += len(uncompressed)
- }
- return nRet, nil
-}
-
-// Flush flushes the Writer to its underlying io.Writer.
-// This does not apply padding.
-func (w *Writer) Flush() error {
- if err := w.err(nil); err != nil {
- return err
- }
-
- // Queue any data still in input buffer.
- if len(w.ibuf) != 0 {
- if !w.wroteStreamHeader {
- _, err := w.writeSync(w.ibuf)
- w.ibuf = w.ibuf[:0]
- return w.err(err)
- } else {
- _, err := w.write(w.ibuf)
- w.ibuf = w.ibuf[:0]
- err = w.err(err)
- if err != nil {
- return err
- }
- }
- }
- if w.output == nil {
- return w.err(nil)
- }
-
- // Send empty buffer
- res := make(chan result)
- w.output <- res
- // Block until this has been picked up.
- res <- result{b: nil, startOffset: w.uncompWritten}
- // When it is closed, we have flushed.
- <-res
- return w.err(nil)
-}
-
-// Close calls Flush and then closes the Writer.
-// Calling Close multiple times is ok,
-// but calling CloseIndex after this will make it not return the index.
-func (w *Writer) Close() error {
- _, err := w.closeIndex(w.appendIndex)
- return err
-}
-
-// CloseIndex calls Close and returns an index on first call.
-// This is not required if you are only adding index to a stream.
-func (w *Writer) CloseIndex() ([]byte, error) {
- return w.closeIndex(true)
-}
-
-func (w *Writer) closeIndex(idx bool) ([]byte, error) {
- err := w.Flush()
- if w.output != nil {
- close(w.output)
- w.writerWg.Wait()
- w.output = nil
- }
-
- var index []byte
- if w.err(nil) == nil && w.writer != nil {
- // Create index.
- if idx {
- compSize := int64(-1)
- if w.pad <= 1 {
- compSize = w.written
- }
- index = w.index.appendTo(w.ibuf[:0], w.uncompWritten, compSize)
- // Count as written for padding.
- if w.appendIndex {
- w.written += int64(len(index))
- }
- if true {
- _, err := w.index.Load(index)
- if err != nil {
- panic(err)
- }
- }
- }
-
- if w.pad > 1 {
- tmp := w.ibuf[:0]
- if len(index) > 0 {
- // Allocate another buffer.
- tmp = w.buffers.Get().([]byte)[:0]
- defer w.buffers.Put(tmp)
- }
- add := calcSkippableFrame(w.written, int64(w.pad))
- frame, err := skippableFrame(tmp, add, w.randSrc)
- if err = w.err(err); err != nil {
- return nil, err
- }
- n, err2 := w.writer.Write(frame)
- if err2 == nil && n != len(frame) {
- err2 = io.ErrShortWrite
- }
- _ = w.err(err2)
- }
- if len(index) > 0 && w.appendIndex {
- n, err2 := w.writer.Write(index)
- if err2 == nil && n != len(index) {
- err2 = io.ErrShortWrite
- }
- _ = w.err(err2)
- }
- }
- err = w.err(errClosed)
- if err == errClosed {
- return index, nil
- }
- return nil, err
-}
-
-// calcSkippableFrame will return a total size to be added for written
-// to be divisible by multiple.
-// The value will always be > skippableFrameHeader.
-// The function will panic if written < 0 or wantMultiple <= 0.
-func calcSkippableFrame(written, wantMultiple int64) int {
- if wantMultiple <= 0 {
- panic("wantMultiple <= 0")
- }
- if written < 0 {
- panic("written < 0")
- }
- leftOver := written % wantMultiple
- if leftOver == 0 {
- return 0
- }
- toAdd := wantMultiple - leftOver
- for toAdd < skippableFrameHeader {
- toAdd += wantMultiple
- }
- return int(toAdd)
-}
-
-// skippableFrame will add a skippable frame with a total size of bytes.
-// total should be >= skippableFrameHeader and < maxBlockSize + skippableFrameHeader
-func skippableFrame(dst []byte, total int, r io.Reader) ([]byte, error) {
- if total == 0 {
- return dst, nil
- }
- if total < skippableFrameHeader {
- return dst, fmt.Errorf("s2: requested skippable frame (%d) < 4", total)
- }
- if int64(total) >= maxBlockSize+skippableFrameHeader {
- return dst, fmt.Errorf("s2: requested skippable frame (%d) >= max 1<<24", total)
- }
- // Chunk type 0xfe "Section 4.4 Padding (chunk type 0xfe)"
- dst = append(dst, chunkTypePadding)
- f := uint32(total - skippableFrameHeader)
- // Add chunk length.
- dst = append(dst, uint8(f), uint8(f>>8), uint8(f>>16))
- // Add data
- start := len(dst)
- dst = append(dst, make([]byte, f)...)
- _, err := io.ReadFull(r, dst[start:])
- return dst, err
-}
-
-// WriterOption is an option for creating a encoder.
-type WriterOption func(*Writer) error
-
-// WriterConcurrency will set the concurrency,
-// meaning the maximum number of decoders to run concurrently.
-// The value supplied must be at least 1.
-// By default this will be set to GOMAXPROCS.
-func WriterConcurrency(n int) WriterOption {
- return func(w *Writer) error {
- if n <= 0 {
- return errors.New("concurrency must be at least 1")
- }
- w.concurrency = n
- return nil
- }
-}
-
-// WriterAddIndex will append an index to the end of a stream
-// when it is closed.
-func WriterAddIndex() WriterOption {
- return func(w *Writer) error {
- w.appendIndex = true
- return nil
- }
-}
-
-// WriterBetterCompression will enable better compression.
-// EncodeBetter compresses better than Encode but typically with a
-// 10-40% speed decrease on both compression and decompression.
-func WriterBetterCompression() WriterOption {
- return func(w *Writer) error {
- w.level = levelBetter
- return nil
- }
-}
-
-// WriterBestCompression will enable better compression.
-// EncodeBetter compresses better than Encode but typically with a
-// big speed decrease on compression.
-func WriterBestCompression() WriterOption {
- return func(w *Writer) error {
- w.level = levelBest
- return nil
- }
-}
-
-// WriterUncompressed will bypass compression.
-// The stream will be written as uncompressed blocks only.
-// If concurrency is > 1 CRC and output will still be done async.
-func WriterUncompressed() WriterOption {
- return func(w *Writer) error {
- w.level = levelUncompressed
- return nil
- }
-}
-
-// WriterBlockSize allows to override the default block size.
-// Blocks will be this size or smaller.
-// Minimum size is 4KB and and maximum size is 4MB.
-//
-// Bigger blocks may give bigger throughput on systems with many cores,
-// and will increase compression slightly, but it will limit the possible
-// concurrency for smaller payloads for both encoding and decoding.
-// Default block size is 1MB.
-//
-// When writing Snappy compatible output using WriterSnappyCompat,
-// the maximum block size is 64KB.
-func WriterBlockSize(n int) WriterOption {
- return func(w *Writer) error {
- if w.snappy && n > maxSnappyBlockSize || n < minBlockSize {
- return errors.New("s2: block size too large. Must be <= 64K and >=4KB on for snappy compatible output")
- }
- if n > maxBlockSize || n < minBlockSize {
- return errors.New("s2: block size too large. Must be <= 4MB and >=4KB")
- }
- w.blockSize = n
- return nil
- }
-}
-
-// WriterPadding will add padding to all output so the size will be a multiple of n.
-// This can be used to obfuscate the exact output size or make blocks of a certain size.
-// The contents will be a skippable frame, so it will be invisible by the decoder.
-// n must be > 0 and <= 4MB.
-// The padded area will be filled with data from crypto/rand.Reader.
-// The padding will be applied whenever Close is called on the writer.
-func WriterPadding(n int) WriterOption {
- return func(w *Writer) error {
- if n <= 0 {
- return fmt.Errorf("s2: padding must be at least 1")
- }
- // No need to waste our time.
- if n == 1 {
- w.pad = 0
- }
- if n > maxBlockSize {
- return fmt.Errorf("s2: padding must less than 4MB")
- }
- w.pad = n
- return nil
- }
-}
-
-// WriterPaddingSrc will get random data for padding from the supplied source.
-// By default crypto/rand is used.
-func WriterPaddingSrc(reader io.Reader) WriterOption {
- return func(w *Writer) error {
- w.randSrc = reader
- return nil
- }
-}
-
-// WriterSnappyCompat will write snappy compatible output.
-// The output can be decompressed using either snappy or s2.
-// If block size is more than 64KB it is set to that.
-func WriterSnappyCompat() WriterOption {
- return func(w *Writer) error {
- w.snappy = true
- if w.blockSize > 64<<10 {
- // We choose 8 bytes less than 64K, since that will make literal emits slightly more effective.
- // And allows us to skip some size checks.
- w.blockSize = (64 << 10) - 8
- }
- return nil
- }
-}
-
-// WriterFlushOnWrite will compress blocks on each call to the Write function.
-//
-// This is quite inefficient as blocks size will depend on the write size.
-//
-// Use WriterConcurrency(1) to also make sure that output is flushed.
-// When Write calls return, otherwise they will be written when compression is done.
-func WriterFlushOnWrite() WriterOption {
- return func(w *Writer) error {
- w.flushOnWrite = true
- return nil
- }
-}
diff --git a/vendor/github.com/klauspost/compress/s2/encode_all.go b/vendor/github.com/klauspost/compress/s2/encode_all.go
deleted file mode 100644
index 8b16c38a6..000000000
--- a/vendor/github.com/klauspost/compress/s2/encode_all.go
+++ /dev/null
@@ -1,456 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
- "bytes"
- "encoding/binary"
- "math/bits"
-)
-
-func load32(b []byte, i int) uint32 {
- return binary.LittleEndian.Uint32(b[i:])
-}
-
-func load64(b []byte, i int) uint64 {
- return binary.LittleEndian.Uint64(b[i:])
-}
-
-// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash6(u uint64, h uint8) uint32 {
- const prime6bytes = 227718039650203
- return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
-}
-
-func encodeGo(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if len(dst) < n {
- dst = make([]byte, n)
- }
-
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
-
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlockGo(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
-}
-
-// encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockGo(dst, src []byte) (d int) {
- // Initialize the hash table.
- const (
- tableBits = 14
- maxTableSize = 1 << tableBits
-
- debug = false
- )
-
- var table [maxTableSize]uint32
-
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := len(src) - inputMargin
-
- // Bail if we can't compress to at least this.
- dstLimit := len(src) - len(src)>>5 - 5
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := 0
-
- // The encoded form must start with a literal, as there are no previous
- // bytes to copy, so we start looking for hash matches at s == 1.
- s := 1
- cv := load64(src, s)
-
- // We search for a repeat at -1, but don't output repeats when nextEmit == 0
- repeat := 1
-
- for {
- candidate := 0
- for {
- // Next src position to check
- nextS := s + (s-nextEmit)>>6 + 4
- if nextS > sLimit {
- goto emitRemainder
- }
- hash0 := hash6(cv, tableBits)
- hash1 := hash6(cv>>8, tableBits)
- candidate = int(table[hash0])
- candidate2 := int(table[hash1])
- table[hash0] = uint32(s)
- table[hash1] = uint32(s + 1)
- hash2 := hash6(cv>>16, tableBits)
-
- // Check repeat at offset checkRep.
- const checkRep = 1
- if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
- base := s + checkRep
- // Extend back
- for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
- i--
- base--
- }
- d += emitLiteral(dst[d:], src[nextEmit:base])
-
- // Extend forward
- candidate := s - repeat + 4 + checkRep
- s += 4 + checkRep
- for s <= sLimit {
- if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
- s += bits.TrailingZeros64(diff) >> 3
- break
- }
- s += 8
- candidate += 8
- }
- if debug {
- // Validate match.
- if s <= candidate {
- panic("s <= candidate")
- }
- a := src[base:s]
- b := src[base-repeat : base-repeat+(s-base)]
- if !bytes.Equal(a, b) {
- panic("mismatch")
- }
- }
- if nextEmit > 0 {
- // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
- d += emitRepeat(dst[d:], repeat, s-base)
- } else {
- // First match, cannot be repeat.
- d += emitCopy(dst[d:], repeat, s-base)
- }
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- cv = load64(src, s)
- continue
- }
-
- if uint32(cv) == load32(src, candidate) {
- break
- }
- candidate = int(table[hash2])
- if uint32(cv>>8) == load32(src, candidate2) {
- table[hash2] = uint32(s + 2)
- candidate = candidate2
- s++
- break
- }
- table[hash2] = uint32(s + 2)
- if uint32(cv>>16) == load32(src, candidate) {
- s += 2
- break
- }
-
- cv = load64(src, nextS)
- s = nextS
- }
-
- // Extend backwards.
- // The top bytes will be rechecked to get the full match.
- for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
- candidate--
- s--
- }
-
- // Bail if we exceed the maximum size.
- if d+(s-nextEmit) > dstLimit {
- return 0
- }
-
- // A 4-byte match has been found. We'll later see if more than 4 bytes
- // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
- // them as literal bytes.
-
- d += emitLiteral(dst[d:], src[nextEmit:s])
-
- // Call emitCopy, and then see if another emitCopy could be our next
- // move. Repeat until we find no match for the input immediately after
- // what was consumed by the last emitCopy call.
- //
- // If we exit this loop normally then we need to call emitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can
- // exit this loop via goto if we get close to exhausting the input.
- for {
- // Invariant: we have a 4-byte match at s, and no need to emit any
- // literal bytes prior to s.
- base := s
- repeat = base - candidate
-
- // Extend the 4-byte match as long as possible.
- s += 4
- candidate += 4
- for s <= len(src)-8 {
- if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
- s += bits.TrailingZeros64(diff) >> 3
- break
- }
- s += 8
- candidate += 8
- }
-
- d += emitCopy(dst[d:], repeat, s-base)
- if debug {
- // Validate match.
- if s <= candidate {
- panic("s <= candidate")
- }
- a := src[base:s]
- b := src[base-repeat : base-repeat+(s-base)]
- if !bytes.Equal(a, b) {
- panic("mismatch")
- }
- }
-
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- if d > dstLimit {
- // Do we have space for more, if not bail.
- return 0
- }
- // Check for an immediate match, otherwise start search at s+1
- x := load64(src, s-2)
- m2Hash := hash6(x, tableBits)
- currHash := hash6(x>>16, tableBits)
- candidate = int(table[currHash])
- table[m2Hash] = uint32(s - 2)
- table[currHash] = uint32(s)
- if debug && s == candidate {
- panic("s == candidate")
- }
- if uint32(x>>16) != load32(src, candidate) {
- cv = load64(src, s+1)
- s++
- break
- }
- }
- }
-
-emitRemainder:
- if nextEmit < len(src) {
- // Bail if we exceed the maximum size.
- if d+len(src)-nextEmit > dstLimit {
- return 0
- }
- d += emitLiteral(dst[d:], src[nextEmit:])
- }
- return d
-}
-
-func encodeBlockSnappyGo(dst, src []byte) (d int) {
- // Initialize the hash table.
- const (
- tableBits = 14
- maxTableSize = 1 << tableBits
- )
-
- var table [maxTableSize]uint32
-
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := len(src) - inputMargin
-
- // Bail if we can't compress to at least this.
- dstLimit := len(src) - len(src)>>5 - 5
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := 0
-
- // The encoded form must start with a literal, as there are no previous
- // bytes to copy, so we start looking for hash matches at s == 1.
- s := 1
- cv := load64(src, s)
-
- // We search for a repeat at -1, but don't output repeats when nextEmit == 0
- repeat := 1
-
- for {
- candidate := 0
- for {
- // Next src position to check
- nextS := s + (s-nextEmit)>>6 + 4
- if nextS > sLimit {
- goto emitRemainder
- }
- hash0 := hash6(cv, tableBits)
- hash1 := hash6(cv>>8, tableBits)
- candidate = int(table[hash0])
- candidate2 := int(table[hash1])
- table[hash0] = uint32(s)
- table[hash1] = uint32(s + 1)
- hash2 := hash6(cv>>16, tableBits)
-
- // Check repeat at offset checkRep.
- const checkRep = 1
- if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
- base := s + checkRep
- // Extend back
- for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
- i--
- base--
- }
- d += emitLiteral(dst[d:], src[nextEmit:base])
-
- // Extend forward
- candidate := s - repeat + 4 + checkRep
- s += 4 + checkRep
- for s <= sLimit {
- if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
- s += bits.TrailingZeros64(diff) >> 3
- break
- }
- s += 8
- candidate += 8
- }
-
- d += emitCopyNoRepeat(dst[d:], repeat, s-base)
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- cv = load64(src, s)
- continue
- }
-
- if uint32(cv) == load32(src, candidate) {
- break
- }
- candidate = int(table[hash2])
- if uint32(cv>>8) == load32(src, candidate2) {
- table[hash2] = uint32(s + 2)
- candidate = candidate2
- s++
- break
- }
- table[hash2] = uint32(s + 2)
- if uint32(cv>>16) == load32(src, candidate) {
- s += 2
- break
- }
-
- cv = load64(src, nextS)
- s = nextS
- }
-
- // Extend backwards
- for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
- candidate--
- s--
- }
-
- // Bail if we exceed the maximum size.
- if d+(s-nextEmit) > dstLimit {
- return 0
- }
-
- // A 4-byte match has been found. We'll later see if more than 4 bytes
- // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
- // them as literal bytes.
-
- d += emitLiteral(dst[d:], src[nextEmit:s])
-
- // Call emitCopy, and then see if another emitCopy could be our next
- // move. Repeat until we find no match for the input immediately after
- // what was consumed by the last emitCopy call.
- //
- // If we exit this loop normally then we need to call emitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can
- // exit this loop via goto if we get close to exhausting the input.
- for {
- // Invariant: we have a 4-byte match at s, and no need to emit any
- // literal bytes prior to s.
- base := s
- repeat = base - candidate
-
- // Extend the 4-byte match as long as possible.
- s += 4
- candidate += 4
- for s <= len(src)-8 {
- if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
- s += bits.TrailingZeros64(diff) >> 3
- break
- }
- s += 8
- candidate += 8
- }
-
- d += emitCopyNoRepeat(dst[d:], repeat, s-base)
- if false {
- // Validate match.
- a := src[base:s]
- b := src[base-repeat : base-repeat+(s-base)]
- if !bytes.Equal(a, b) {
- panic("mismatch")
- }
- }
-
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- if d > dstLimit {
- // Do we have space for more, if not bail.
- return 0
- }
- // Check for an immediate match, otherwise start search at s+1
- x := load64(src, s-2)
- m2Hash := hash6(x, tableBits)
- currHash := hash6(x>>16, tableBits)
- candidate = int(table[currHash])
- table[m2Hash] = uint32(s - 2)
- table[currHash] = uint32(s)
- if uint32(x>>16) != load32(src, candidate) {
- cv = load64(src, s+1)
- s++
- break
- }
- }
- }
-
-emitRemainder:
- if nextEmit < len(src) {
- // Bail if we exceed the maximum size.
- if d+len(src)-nextEmit > dstLimit {
- return 0
- }
- d += emitLiteral(dst[d:], src[nextEmit:])
- }
- return d
-}
diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go
deleted file mode 100644
index e612225f4..000000000
--- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go
+++ /dev/null
@@ -1,142 +0,0 @@
-//go:build !appengine && !noasm && gc
-// +build !appengine,!noasm,gc
-
-package s2
-
-// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlock(dst, src []byte) (d int) {
- const (
- // Use 12 bit table when less than...
- limit12B = 16 << 10
- // Use 10 bit table when less than...
- limit10B = 4 << 10
- // Use 8 bit table when less than...
- limit8B = 512
- )
-
- if len(src) >= 4<<20 {
- return encodeBlockAsm(dst, src)
- }
- if len(src) >= limit12B {
- return encodeBlockAsm4MB(dst, src)
- }
- if len(src) >= limit10B {
- return encodeBlockAsm12B(dst, src)
- }
- if len(src) >= limit8B {
- return encodeBlockAsm10B(dst, src)
- }
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
- return encodeBlockAsm8B(dst, src)
-}
-
-// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBetter(dst, src []byte) (d int) {
- const (
- // Use 12 bit table when less than...
- limit12B = 16 << 10
- // Use 10 bit table when less than...
- limit10B = 4 << 10
- // Use 8 bit table when less than...
- limit8B = 512
- )
-
- if len(src) > 4<<20 {
- return encodeBetterBlockAsm(dst, src)
- }
- if len(src) >= limit12B {
- return encodeBetterBlockAsm4MB(dst, src)
- }
- if len(src) >= limit10B {
- return encodeBetterBlockAsm12B(dst, src)
- }
- if len(src) >= limit8B {
- return encodeBetterBlockAsm10B(dst, src)
- }
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
- return encodeBetterBlockAsm8B(dst, src)
-}
-
-// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockSnappy(dst, src []byte) (d int) {
- const (
- // Use 12 bit table when less than...
- limit12B = 16 << 10
- // Use 10 bit table when less than...
- limit10B = 4 << 10
- // Use 8 bit table when less than...
- limit8B = 512
- )
- if len(src) >= 64<<10 {
- return encodeSnappyBlockAsm(dst, src)
- }
- if len(src) >= limit12B {
- return encodeSnappyBlockAsm64K(dst, src)
- }
- if len(src) >= limit10B {
- return encodeSnappyBlockAsm12B(dst, src)
- }
- if len(src) >= limit8B {
- return encodeSnappyBlockAsm10B(dst, src)
- }
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
- return encodeSnappyBlockAsm8B(dst, src)
-}
-
-// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBetterSnappy(dst, src []byte) (d int) {
- const (
- // Use 12 bit table when less than...
- limit12B = 16 << 10
- // Use 10 bit table when less than...
- limit10B = 4 << 10
- // Use 8 bit table when less than...
- limit8B = 512
- )
- if len(src) >= 64<<10 {
- return encodeSnappyBetterBlockAsm(dst, src)
- }
- if len(src) >= limit12B {
- return encodeSnappyBetterBlockAsm64K(dst, src)
- }
- if len(src) >= limit10B {
- return encodeSnappyBetterBlockAsm12B(dst, src)
- }
- if len(src) >= limit8B {
- return encodeSnappyBetterBlockAsm10B(dst, src)
- }
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
- return encodeSnappyBetterBlockAsm8B(dst, src)
-}
diff --git a/vendor/github.com/klauspost/compress/s2/encode_best.go b/vendor/github.com/klauspost/compress/s2/encode_best.go
deleted file mode 100644
index 448034776..000000000
--- a/vendor/github.com/klauspost/compress/s2/encode_best.go
+++ /dev/null
@@ -1,604 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
- "fmt"
- "math/bits"
-)
-
-// encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBest(dst, src []byte) (d int) {
- // Initialize the hash tables.
- const (
- // Long hash matches.
- lTableBits = 19
- maxLTableSize = 1 << lTableBits
-
- // Short hash matches.
- sTableBits = 16
- maxSTableSize = 1 << sTableBits
-
- inputMargin = 8 + 2
- )
-
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := len(src) - inputMargin
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
-
- var lTable [maxLTableSize]uint64
- var sTable [maxSTableSize]uint64
-
- // Bail if we can't compress to at least this.
- dstLimit := len(src) - 5
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := 0
-
- // The encoded form must start with a literal, as there are no previous
- // bytes to copy, so we start looking for hash matches at s == 1.
- s := 1
- cv := load64(src, s)
-
- // We search for a repeat at -1, but don't output repeats when nextEmit == 0
- repeat := 1
- const lowbitMask = 0xffffffff
- getCur := func(x uint64) int {
- return int(x & lowbitMask)
- }
- getPrev := func(x uint64) int {
- return int(x >> 32)
- }
- const maxSkip = 64
-
- for {
- type match struct {
- offset int
- s int
- length int
- score int
- rep bool
- }
- var best match
- for {
- // Next src position to check
- nextS := (s-nextEmit)>>8 + 1
- if nextS > maxSkip {
- nextS = s + maxSkip
- } else {
- nextS += s
- }
- if nextS > sLimit {
- goto emitRemainder
- }
- hashL := hash8(cv, lTableBits)
- hashS := hash4(cv, sTableBits)
- candidateL := lTable[hashL]
- candidateS := sTable[hashS]
-
- score := func(m match) int {
- // Matches that are longer forward are penalized since we must emit it as a literal.
- score := m.length - m.s
- if nextEmit == m.s {
- // If we do not have to emit literals, we save 1 byte
- score++
- }
- offset := m.s - m.offset
- if m.rep {
- return score - emitRepeatSize(offset, m.length)
- }
- return score - emitCopySize(offset, m.length)
- }
-
- matchAt := func(offset, s int, first uint32, rep bool) match {
- if best.length != 0 && best.s-best.offset == s-offset {
- // Don't retest if we have the same offset.
- return match{offset: offset, s: s}
- }
- if load32(src, offset) != first {
- return match{offset: offset, s: s}
- }
- m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
- s += 4
- for s <= sLimit {
- if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
- m.length += bits.TrailingZeros64(diff) >> 3
- break
- }
- s += 8
- m.length += 8
- }
- m.length -= offset
- m.score = score(m)
- if m.score <= -m.s {
- // Eliminate if no savings, we might find a better one.
- m.length = 0
- }
- return m
- }
-
- bestOf := func(a, b match) match {
- if b.length == 0 {
- return a
- }
- if a.length == 0 {
- return b
- }
- as := a.score + b.s
- bs := b.score + a.s
- if as >= bs {
- return a
- }
- return b
- }
-
- best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
- best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
- best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
-
- {
- best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
- if best.length > 0 {
- // s+1
- nextShort := sTable[hash4(cv>>8, sTableBits)]
- s := s + 1
- cv := load64(src, s)
- nextLong := lTable[hash8(cv, lTableBits)]
- best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
- best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
- best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
- best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
- // Repeat at + 2
- best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
-
- // s+2
- if true {
- nextShort = sTable[hash4(cv>>8, sTableBits)]
- s++
- cv = load64(src, s)
- nextLong = lTable[hash8(cv, lTableBits)]
- best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
- best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
- best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
- best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
- }
- // Search for a match at best match end, see if that is better.
- if sAt := best.s + best.length; sAt < sLimit {
- sBack := best.s
- backL := best.length
- // Load initial values
- cv = load64(src, sBack)
- // Search for mismatch
- next := lTable[hash8(load64(src, sAt), lTableBits)]
- //next := sTable[hash4(load64(src, sAt), sTableBits)]
-
- if checkAt := getCur(next) - backL; checkAt > 0 {
- best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
- }
- if checkAt := getPrev(next) - backL; checkAt > 0 {
- best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
- }
- }
- }
- }
-
- // Update table
- lTable[hashL] = uint64(s) | candidateL<<32
- sTable[hashS] = uint64(s) | candidateS<<32
-
- if best.length > 0 {
- break
- }
-
- cv = load64(src, nextS)
- s = nextS
- }
-
- // Extend backwards, not needed for repeats...
- s = best.s
- if !best.rep {
- for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
- best.offset--
- best.length++
- s--
- }
- }
- if false && best.offset >= s {
- panic(fmt.Errorf("t %d >= s %d", best.offset, s))
- }
- // Bail if we exceed the maximum size.
- if d+(s-nextEmit) > dstLimit {
- return 0
- }
-
- base := s
- offset := s - best.offset
-
- s += best.length
-
- if offset > 65535 && s-base <= 5 && !best.rep {
- // Bail if the match is equal or worse to the encoding.
- s = best.s + 1
- if s >= sLimit {
- goto emitRemainder
- }
- cv = load64(src, s)
- continue
- }
- d += emitLiteral(dst[d:], src[nextEmit:base])
- if best.rep {
- if nextEmit > 0 {
- // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
- d += emitRepeat(dst[d:], offset, best.length)
- } else {
- // First match, cannot be repeat.
- d += emitCopy(dst[d:], offset, best.length)
- }
- } else {
- d += emitCopy(dst[d:], offset, best.length)
- }
- repeat = offset
-
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- if d > dstLimit {
- // Do we have space for more, if not bail.
- return 0
- }
- // Fill tables...
- for i := best.s + 1; i < s; i++ {
- cv0 := load64(src, i)
- long0 := hash8(cv0, lTableBits)
- short0 := hash4(cv0, sTableBits)
- lTable[long0] = uint64(i) | lTable[long0]<<32
- sTable[short0] = uint64(i) | sTable[short0]<<32
- }
- cv = load64(src, s)
- }
-
-emitRemainder:
- if nextEmit < len(src) {
- // Bail if we exceed the maximum size.
- if d+len(src)-nextEmit > dstLimit {
- return 0
- }
- d += emitLiteral(dst[d:], src[nextEmit:])
- }
- return d
-}
-
-// encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBestSnappy(dst, src []byte) (d int) {
- // Initialize the hash tables.
- const (
- // Long hash matches.
- lTableBits = 19
- maxLTableSize = 1 << lTableBits
-
- // Short hash matches.
- sTableBits = 16
- maxSTableSize = 1 << sTableBits
-
- inputMargin = 8 + 2
- )
-
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := len(src) - inputMargin
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
-
- var lTable [maxLTableSize]uint64
- var sTable [maxSTableSize]uint64
-
- // Bail if we can't compress to at least this.
- dstLimit := len(src) - 5
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := 0
-
- // The encoded form must start with a literal, as there are no previous
- // bytes to copy, so we start looking for hash matches at s == 1.
- s := 1
- cv := load64(src, s)
-
- // We search for a repeat at -1, but don't output repeats when nextEmit == 0
- repeat := 1
- const lowbitMask = 0xffffffff
- getCur := func(x uint64) int {
- return int(x & lowbitMask)
- }
- getPrev := func(x uint64) int {
- return int(x >> 32)
- }
- const maxSkip = 64
-
- for {
- type match struct {
- offset int
- s int
- length int
- score int
- }
- var best match
- for {
- // Next src position to check
- nextS := (s-nextEmit)>>8 + 1
- if nextS > maxSkip {
- nextS = s + maxSkip
- } else {
- nextS += s
- }
- if nextS > sLimit {
- goto emitRemainder
- }
- hashL := hash8(cv, lTableBits)
- hashS := hash4(cv, sTableBits)
- candidateL := lTable[hashL]
- candidateS := sTable[hashS]
-
- score := func(m match) int {
- // Matches that are longer forward are penalized since we must emit it as a literal.
- score := m.length - m.s
- if nextEmit == m.s {
- // If we do not have to emit literals, we save 1 byte
- score++
- }
- offset := m.s - m.offset
-
- return score - emitCopySize(offset, m.length)
- }
-
- matchAt := func(offset, s int, first uint32) match {
- if best.length != 0 && best.s-best.offset == s-offset {
- // Don't retest if we have the same offset.
- return match{offset: offset, s: s}
- }
- if load32(src, offset) != first {
- return match{offset: offset, s: s}
- }
- m := match{offset: offset, s: s, length: 4 + offset}
- s += 4
- for s <= sLimit {
- if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
- m.length += bits.TrailingZeros64(diff) >> 3
- break
- }
- s += 8
- m.length += 8
- }
- m.length -= offset
- m.score = score(m)
- if m.score <= -m.s {
- // Eliminate if no savings, we might find a better one.
- m.length = 0
- }
- return m
- }
-
- bestOf := func(a, b match) match {
- if b.length == 0 {
- return a
- }
- if a.length == 0 {
- return b
- }
- as := a.score + b.s
- bs := b.score + a.s
- if as >= bs {
- return a
- }
- return b
- }
-
- best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
- best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
- best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))
-
- {
- best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
- if best.length > 0 {
- // s+1
- nextShort := sTable[hash4(cv>>8, sTableBits)]
- s := s + 1
- cv := load64(src, s)
- nextLong := lTable[hash8(cv, lTableBits)]
- best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
- best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
- best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
- best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
- // Repeat at + 2
- best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
-
- // s+2
- if true {
- nextShort = sTable[hash4(cv>>8, sTableBits)]
- s++
- cv = load64(src, s)
- nextLong = lTable[hash8(cv, lTableBits)]
- best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
- best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
- best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
- best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
- }
- // Search for a match at best match end, see if that is better.
- if sAt := best.s + best.length; sAt < sLimit {
- sBack := best.s
- backL := best.length
- // Load initial values
- cv = load64(src, sBack)
- // Search for mismatch
- next := lTable[hash8(load64(src, sAt), lTableBits)]
- //next := sTable[hash4(load64(src, sAt), sTableBits)]
-
- if checkAt := getCur(next) - backL; checkAt > 0 {
- best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
- }
- if checkAt := getPrev(next) - backL; checkAt > 0 {
- best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
- }
- }
- }
- }
-
- // Update table
- lTable[hashL] = uint64(s) | candidateL<<32
- sTable[hashS] = uint64(s) | candidateS<<32
-
- if best.length > 0 {
- break
- }
-
- cv = load64(src, nextS)
- s = nextS
- }
-
- // Extend backwards, not needed for repeats...
- s = best.s
- if true {
- for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
- best.offset--
- best.length++
- s--
- }
- }
- if false && best.offset >= s {
- panic(fmt.Errorf("t %d >= s %d", best.offset, s))
- }
- // Bail if we exceed the maximum size.
- if d+(s-nextEmit) > dstLimit {
- return 0
- }
-
- base := s
- offset := s - best.offset
-
- s += best.length
-
- if offset > 65535 && s-base <= 5 {
- // Bail if the match is equal or worse to the encoding.
- s = best.s + 1
- if s >= sLimit {
- goto emitRemainder
- }
- cv = load64(src, s)
- continue
- }
- d += emitLiteral(dst[d:], src[nextEmit:base])
- d += emitCopyNoRepeat(dst[d:], offset, best.length)
- repeat = offset
-
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- if d > dstLimit {
- // Do we have space for more, if not bail.
- return 0
- }
- // Fill tables...
- for i := best.s + 1; i < s; i++ {
- cv0 := load64(src, i)
- long0 := hash8(cv0, lTableBits)
- short0 := hash4(cv0, sTableBits)
- lTable[long0] = uint64(i) | lTable[long0]<<32
- sTable[short0] = uint64(i) | sTable[short0]<<32
- }
- cv = load64(src, s)
- }
-
-emitRemainder:
- if nextEmit < len(src) {
- // Bail if we exceed the maximum size.
- if d+len(src)-nextEmit > dstLimit {
- return 0
- }
- d += emitLiteral(dst[d:], src[nextEmit:])
- }
- return d
-}
-
-// emitCopySize returns the size to encode the offset+length
-//
-// It assumes that:
-// 1 <= offset && offset <= math.MaxUint32
-// 4 <= length && length <= 1 << 24
-func emitCopySize(offset, length int) int {
- if offset >= 65536 {
- i := 0
- if length > 64 {
- length -= 64
- if length >= 4 {
- // Emit remaining as repeats
- return 5 + emitRepeatSize(offset, length)
- }
- i = 5
- }
- if length == 0 {
- return i
- }
- return i + 5
- }
-
- // Offset no more than 2 bytes.
- if length > 64 {
- // Emit remaining as repeats, at least 4 bytes remain.
- return 3 + emitRepeatSize(offset, length-60)
- }
- if length >= 12 || offset >= 2048 {
- return 3
- }
- // Emit the remaining copy, encoded as 2 bytes.
- return 2
-}
-
-// emitRepeatSize returns the number of bytes required to encode a repeat.
-// Length must be at least 4 and < 1<<24
-func emitRepeatSize(offset, length int) int {
- // Repeat offset, make length cheaper
- if length <= 4+4 || (length < 8+4 && offset < 2048) {
- return 2
- }
- if length < (1<<8)+4+4 {
- return 3
- }
- if length < (1<<16)+(1<<8)+4 {
- return 4
- }
- const maxRepeat = (1 << 24) - 1
- length -= (1 << 16) - 4
- left := 0
- if length > maxRepeat {
- left = length - maxRepeat + 4
- length = maxRepeat - 4
- }
- if left > 0 {
- return 5 + emitRepeatSize(offset, left)
- }
- return 5
-}
diff --git a/vendor/github.com/klauspost/compress/s2/encode_better.go b/vendor/github.com/klauspost/compress/s2/encode_better.go
deleted file mode 100644
index 943215b8a..000000000
--- a/vendor/github.com/klauspost/compress/s2/encode_better.go
+++ /dev/null
@@ -1,431 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
- "math/bits"
-)
-
-// hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <32.
-func hash4(u uint64, h uint8) uint32 {
- const prime4bytes = 2654435761
- return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
-}
-
-// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash5(u uint64, h uint8) uint32 {
- const prime5bytes = 889523592379
- return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
-}
-
-// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash7(u uint64, h uint8) uint32 {
- const prime7bytes = 58295818150454627
- return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
-}
-
-// hash8 returns the hash of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash8(u uint64, h uint8) uint32 {
- const prime8bytes = 0xcf1bbcdcb7a56463
- return uint32((u * prime8bytes) >> ((64 - h) & 63))
-}
-
-// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBetterGo(dst, src []byte) (d int) {
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := len(src) - inputMargin
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
-
- // Initialize the hash tables.
- const (
- // Long hash matches.
- lTableBits = 16
- maxLTableSize = 1 << lTableBits
-
- // Short hash matches.
- sTableBits = 14
- maxSTableSize = 1 << sTableBits
- )
-
- var lTable [maxLTableSize]uint32
- var sTable [maxSTableSize]uint32
-
- // Bail if we can't compress to at least this.
- dstLimit := len(src) - len(src)>>5 - 6
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := 0
-
- // The encoded form must start with a literal, as there are no previous
- // bytes to copy, so we start looking for hash matches at s == 1.
- s := 1
- cv := load64(src, s)
-
- // We initialize repeat to 0, so we never match on first attempt
- repeat := 0
-
- for {
- candidateL := 0
- nextS := 0
- for {
- // Next src position to check
- nextS = s + (s-nextEmit)>>7 + 1
- if nextS > sLimit {
- goto emitRemainder
- }
- hashL := hash7(cv, lTableBits)
- hashS := hash4(cv, sTableBits)
- candidateL = int(lTable[hashL])
- candidateS := int(sTable[hashS])
- lTable[hashL] = uint32(s)
- sTable[hashS] = uint32(s)
-
- // Check repeat at offset checkRep.
- const checkRep = 1
- if false && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
- base := s + checkRep
- // Extend back
- for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
- i--
- base--
- }
- d += emitLiteral(dst[d:], src[nextEmit:base])
-
- // Extend forward
- candidate := s - repeat + 4 + checkRep
- s += 4 + checkRep
- for s < len(src) {
- if len(src)-s < 8 {
- if src[s] == src[candidate] {
- s++
- candidate++
- continue
- }
- break
- }
- if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
- s += bits.TrailingZeros64(diff) >> 3
- break
- }
- s += 8
- candidate += 8
- }
- if nextEmit > 0 {
- // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
- d += emitRepeat(dst[d:], repeat, s-base)
- } else {
- // First match, cannot be repeat.
- d += emitCopy(dst[d:], repeat, s-base)
- }
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- cv = load64(src, s)
- continue
- }
-
- if uint32(cv) == load32(src, candidateL) {
- break
- }
-
- // Check our short candidate
- if uint32(cv) == load32(src, candidateS) {
- // Try a long candidate at s+1
- hashL = hash7(cv>>8, lTableBits)
- candidateL = int(lTable[hashL])
- lTable[hashL] = uint32(s + 1)
- if uint32(cv>>8) == load32(src, candidateL) {
- s++
- break
- }
- // Use our short candidate.
- candidateL = candidateS
- break
- }
-
- cv = load64(src, nextS)
- s = nextS
- }
-
- // Extend backwards
- for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
- candidateL--
- s--
- }
-
- // Bail if we exceed the maximum size.
- if d+(s-nextEmit) > dstLimit {
- return 0
- }
-
- base := s
- offset := base - candidateL
-
- // Extend the 4-byte match as long as possible.
- s += 4
- candidateL += 4
- for s < len(src) {
- if len(src)-s < 8 {
- if src[s] == src[candidateL] {
- s++
- candidateL++
- continue
- }
- break
- }
- if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
- s += bits.TrailingZeros64(diff) >> 3
- break
- }
- s += 8
- candidateL += 8
- }
-
- if offset > 65535 && s-base <= 5 && repeat != offset {
- // Bail if the match is equal or worse to the encoding.
- s = nextS + 1
- if s >= sLimit {
- goto emitRemainder
- }
- cv = load64(src, s)
- continue
- }
-
- d += emitLiteral(dst[d:], src[nextEmit:base])
- if repeat == offset {
- d += emitRepeat(dst[d:], offset, s-base)
- } else {
- d += emitCopy(dst[d:], offset, s-base)
- repeat = offset
- }
-
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- if d > dstLimit {
- // Do we have space for more, if not bail.
- return 0
- }
- // Index match start+1 (long) and start+2 (short)
- index0 := base + 1
- // Index match end-2 (long) and end-1 (short)
- index1 := s - 2
-
- cv0 := load64(src, index0)
- cv1 := load64(src, index1)
- cv = load64(src, s)
- lTable[hash7(cv0, lTableBits)] = uint32(index0)
- lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
- lTable[hash7(cv1, lTableBits)] = uint32(index1)
- lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
- sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
- sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
- sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
- }
-
-emitRemainder:
- if nextEmit < len(src) {
- // Bail if we exceed the maximum size.
- if d+len(src)-nextEmit > dstLimit {
- return 0
- }
- d += emitLiteral(dst[d:], src[nextEmit:])
- }
- return d
-}
-
-// encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := len(src) - inputMargin
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
-
- // Initialize the hash tables.
- const (
- // Long hash matches.
- lTableBits = 16
- maxLTableSize = 1 << lTableBits
-
- // Short hash matches.
- sTableBits = 14
- maxSTableSize = 1 << sTableBits
- )
-
- var lTable [maxLTableSize]uint32
- var sTable [maxSTableSize]uint32
-
- // Bail if we can't compress to at least this.
- dstLimit := len(src) - len(src)>>5 - 6
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := 0
-
- // The encoded form must start with a literal, as there are no previous
- // bytes to copy, so we start looking for hash matches at s == 1.
- s := 1
- cv := load64(src, s)
-
- // We initialize repeat to 0, so we never match on first attempt
- repeat := 0
- const maxSkip = 100
-
- for {
- candidateL := 0
- nextS := 0
- for {
- // Next src position to check
- nextS = (s-nextEmit)>>7 + 1
- if nextS > maxSkip {
- nextS = s + maxSkip
- } else {
- nextS += s
- }
-
- if nextS > sLimit {
- goto emitRemainder
- }
- hashL := hash7(cv, lTableBits)
- hashS := hash4(cv, sTableBits)
- candidateL = int(lTable[hashL])
- candidateS := int(sTable[hashS])
- lTable[hashL] = uint32(s)
- sTable[hashS] = uint32(s)
-
- if uint32(cv) == load32(src, candidateL) {
- break
- }
-
- // Check our short candidate
- if uint32(cv) == load32(src, candidateS) {
- // Try a long candidate at s+1
- hashL = hash7(cv>>8, lTableBits)
- candidateL = int(lTable[hashL])
- lTable[hashL] = uint32(s + 1)
- if uint32(cv>>8) == load32(src, candidateL) {
- s++
- break
- }
- // Use our short candidate.
- candidateL = candidateS
- break
- }
-
- cv = load64(src, nextS)
- s = nextS
- }
-
- // Extend backwards
- for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
- candidateL--
- s--
- }
-
- // Bail if we exceed the maximum size.
- if d+(s-nextEmit) > dstLimit {
- return 0
- }
-
- base := s
- offset := base - candidateL
-
- // Extend the 4-byte match as long as possible.
- s += 4
- candidateL += 4
- for s < len(src) {
- if len(src)-s < 8 {
- if src[s] == src[candidateL] {
- s++
- candidateL++
- continue
- }
- break
- }
- if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
- s += bits.TrailingZeros64(diff) >> 3
- break
- }
- s += 8
- candidateL += 8
- }
-
- if offset > 65535 && s-base <= 5 && repeat != offset {
- // Bail if the match is equal or worse to the encoding.
- s = nextS + 1
- if s >= sLimit {
- goto emitRemainder
- }
- cv = load64(src, s)
- continue
- }
-
- d += emitLiteral(dst[d:], src[nextEmit:base])
- d += emitCopyNoRepeat(dst[d:], offset, s-base)
- repeat = offset
-
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- if d > dstLimit {
- // Do we have space for more, if not bail.
- return 0
- }
- // Index match start+1 (long) and start+2 (short)
- index0 := base + 1
- // Index match end-2 (long) and end-1 (short)
- index1 := s - 2
-
- cv0 := load64(src, index0)
- cv1 := load64(src, index1)
- cv = load64(src, s)
- lTable[hash7(cv0, lTableBits)] = uint32(index0)
- lTable[hash7(cv0>>8, lTableBits)] = uint32(index0 + 1)
- lTable[hash7(cv1, lTableBits)] = uint32(index1)
- lTable[hash7(cv1>>8, lTableBits)] = uint32(index1 + 1)
- sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
- sTable[hash4(cv0>>16, sTableBits)] = uint32(index0 + 2)
- sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
- }
-
-emitRemainder:
- if nextEmit < len(src) {
- // Bail if we exceed the maximum size.
- if d+len(src)-nextEmit > dstLimit {
- return 0
- }
- d += emitLiteral(dst[d:], src[nextEmit:])
- }
- return d
-}
diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go
deleted file mode 100644
index 43d43534e..000000000
--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ /dev/null
@@ -1,298 +0,0 @@
-//go:build !amd64 || appengine || !gc || noasm
-// +build !amd64 appengine !gc noasm
-
-package s2
-
-import (
- "math/bits"
-)
-
-// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src))
-func encodeBlock(dst, src []byte) (d int) {
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
- return encodeBlockGo(dst, src)
-}
-
-// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src))
-func encodeBlockBetter(dst, src []byte) (d int) {
- return encodeBlockBetterGo(dst, src)
-}
-
-// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src))
-func encodeBlockBetterSnappy(dst, src []byte) (d int) {
- return encodeBlockBetterSnappyGo(dst, src)
-}
-
-// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src))
-func encodeBlockSnappy(dst, src []byte) (d int) {
- if len(src) < minNonLiteralBlockSize {
- return 0
- }
- return encodeBlockSnappyGo(dst, src)
-}
-
-// emitLiteral writes a literal chunk and returns the number of bytes written.
-//
-// It assumes that:
-// dst is long enough to hold the encoded bytes
-// 0 <= len(lit) && len(lit) <= math.MaxUint32
-func emitLiteral(dst, lit []byte) int {
- if len(lit) == 0 {
- return 0
- }
- const num = 63<<2 | tagLiteral
- i, n := 0, uint(len(lit)-1)
- switch {
- case n < 60:
- dst[0] = uint8(n)<<2 | tagLiteral
- i = 1
- case n < 1<<8:
- dst[1] = uint8(n)
- dst[0] = 60<<2 | tagLiteral
- i = 2
- case n < 1<<16:
- dst[2] = uint8(n >> 8)
- dst[1] = uint8(n)
- dst[0] = 61<<2 | tagLiteral
- i = 3
- case n < 1<<24:
- dst[3] = uint8(n >> 16)
- dst[2] = uint8(n >> 8)
- dst[1] = uint8(n)
- dst[0] = 62<<2 | tagLiteral
- i = 4
- default:
- dst[4] = uint8(n >> 24)
- dst[3] = uint8(n >> 16)
- dst[2] = uint8(n >> 8)
- dst[1] = uint8(n)
- dst[0] = 63<<2 | tagLiteral
- i = 5
- }
- return i + copy(dst[i:], lit)
-}
-
-// emitRepeat writes a repeat chunk and returns the number of bytes written.
-// Length must be at least 4 and < 1<<24
-func emitRepeat(dst []byte, offset, length int) int {
- // Repeat offset, make length cheaper
- length -= 4
- if length <= 4 {
- dst[0] = uint8(length)<<2 | tagCopy1
- dst[1] = 0
- return 2
- }
- if length < 8 && offset < 2048 {
- // Encode WITH offset
- dst[1] = uint8(offset)
- dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
- return 2
- }
- if length < (1<<8)+4 {
- length -= 4
- dst[2] = uint8(length)
- dst[1] = 0
- dst[0] = 5<<2 | tagCopy1
- return 3
- }
- if length < (1<<16)+(1<<8) {
- length -= 1 << 8
- dst[3] = uint8(length >> 8)
- dst[2] = uint8(length >> 0)
- dst[1] = 0
- dst[0] = 6<<2 | tagCopy1
- return 4
- }
- const maxRepeat = (1 << 24) - 1
- length -= 1 << 16
- left := 0
- if length > maxRepeat {
- left = length - maxRepeat + 4
- length = maxRepeat - 4
- }
- dst[4] = uint8(length >> 16)
- dst[3] = uint8(length >> 8)
- dst[2] = uint8(length >> 0)
- dst[1] = 0
- dst[0] = 7<<2 | tagCopy1
- if left > 0 {
- return 5 + emitRepeat(dst[5:], offset, left)
- }
- return 5
-}
-
-// emitCopy writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-// dst is long enough to hold the encoded bytes
-// 1 <= offset && offset <= math.MaxUint32
-// 4 <= length && length <= 1 << 24
-func emitCopy(dst []byte, offset, length int) int {
- if offset >= 65536 {
- i := 0
- if length > 64 {
- // Emit a length 64 copy, encoded as 5 bytes.
- dst[4] = uint8(offset >> 24)
- dst[3] = uint8(offset >> 16)
- dst[2] = uint8(offset >> 8)
- dst[1] = uint8(offset)
- dst[0] = 63<<2 | tagCopy4
- length -= 64
- if length >= 4 {
- // Emit remaining as repeats
- return 5 + emitRepeat(dst[5:], offset, length)
- }
- i = 5
- }
- if length == 0 {
- return i
- }
- // Emit a copy, offset encoded as 4 bytes.
- dst[i+0] = uint8(length-1)<<2 | tagCopy4
- dst[i+1] = uint8(offset)
- dst[i+2] = uint8(offset >> 8)
- dst[i+3] = uint8(offset >> 16)
- dst[i+4] = uint8(offset >> 24)
- return i + 5
- }
-
- // Offset no more than 2 bytes.
- if length > 64 {
- // Emit a length 60 copy, encoded as 3 bytes.
- // Emit remaining as repeat value (minimum 4 bytes).
- dst[2] = uint8(offset >> 8)
- dst[1] = uint8(offset)
- dst[0] = 59<<2 | tagCopy2
- length -= 60
- // Emit remaining as repeats, at least 4 bytes remain.
- return 3 + emitRepeat(dst[3:], offset, length)
- }
- if length >= 12 || offset >= 2048 {
- // Emit the remaining copy, encoded as 3 bytes.
- dst[2] = uint8(offset >> 8)
- dst[1] = uint8(offset)
- dst[0] = uint8(length-1)<<2 | tagCopy2
- return 3
- }
- // Emit the remaining copy, encoded as 2 bytes.
- dst[1] = uint8(offset)
- dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
- return 2
-}
-
-// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-// dst is long enough to hold the encoded bytes
-// 1 <= offset && offset <= math.MaxUint32
-// 4 <= length && length <= 1 << 24
-func emitCopyNoRepeat(dst []byte, offset, length int) int {
- if offset >= 65536 {
- i := 0
- if length > 64 {
- // Emit a length 64 copy, encoded as 5 bytes.
- dst[4] = uint8(offset >> 24)
- dst[3] = uint8(offset >> 16)
- dst[2] = uint8(offset >> 8)
- dst[1] = uint8(offset)
- dst[0] = 63<<2 | tagCopy4
- length -= 64
- if length >= 4 {
- // Emit remaining as repeats
- return 5 + emitCopyNoRepeat(dst[5:], offset, length)
- }
- i = 5
- }
- if length == 0 {
- return i
- }
- // Emit a copy, offset encoded as 4 bytes.
- dst[i+0] = uint8(length-1)<<2 | tagCopy4
- dst[i+1] = uint8(offset)
- dst[i+2] = uint8(offset >> 8)
- dst[i+3] = uint8(offset >> 16)
- dst[i+4] = uint8(offset >> 24)
- return i + 5
- }
-
- // Offset no more than 2 bytes.
- if length > 64 {
- // Emit a length 60 copy, encoded as 3 bytes.
- // Emit remaining as repeat value (minimum 4 bytes).
- dst[2] = uint8(offset >> 8)
- dst[1] = uint8(offset)
- dst[0] = 59<<2 | tagCopy2
- length -= 60
- // Emit remaining as repeats, at least 4 bytes remain.
- return 3 + emitCopyNoRepeat(dst[3:], offset, length)
- }
- if length >= 12 || offset >= 2048 {
- // Emit the remaining copy, encoded as 3 bytes.
- dst[2] = uint8(offset >> 8)
- dst[1] = uint8(offset)
- dst[0] = uint8(length-1)<<2 | tagCopy2
- return 3
- }
- // Emit the remaining copy, encoded as 2 bytes.
- dst[1] = uint8(offset)
- dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
- return 2
-}
-
-// matchLen returns how many bytes match in a and b
-//
-// It assumes that:
-// len(a) <= len(b)
-//
-func matchLen(a []byte, b []byte) int {
- b = b[:len(a)]
- var checked int
- if len(a) > 4 {
- // Try 4 bytes first
- if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
- return bits.TrailingZeros32(diff) >> 3
- }
- // Switch to 8 byte matching.
- checked = 4
- a = a[4:]
- b = b[4:]
- for len(a) >= 8 {
- b = b[:len(a)]
- if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
- return checked + (bits.TrailingZeros64(diff) >> 3)
- }
- checked += 8
- a = a[8:]
- b = b[8:]
- }
- }
- b = b[:len(a)]
- for i := range a {
- if a[i] != b[i] {
- return int(i) + checked
- }
- }
- return len(a) + checked
-}
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
deleted file mode 100644
index d9312e5b9..000000000
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
+++ /dev/null
@@ -1,189 +0,0 @@
-// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
-
-//go:build !appengine && !noasm && gc && !noasm
-// +build !appengine,!noasm,gc,!noasm
-
-package s2
-
-// encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4294967295 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm(dst []byte, src []byte) int
-
-// encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4194304 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm4MB(dst []byte, src []byte) int
-
-// encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 16383 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm12B(dst []byte, src []byte) int
-
-// encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4095 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm10B(dst []byte, src []byte) int
-
-// encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 511 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm8B(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4294967295 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4194304 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 16383 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm12B(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4095 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm10B(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 511 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm8B(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4294967295 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 65535 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 16383 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4095 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 511 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4294967295 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 65535 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 16383 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4095 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 511 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
-
-// emitLiteral writes a literal chunk and returns the number of bytes written.
-//
-// It assumes that:
-// dst is long enough to hold the encoded bytes with margin of 0 bytes
-// 0 <= len(lit) && len(lit) <= math.MaxUint32
-//
-//go:noescape
-func emitLiteral(dst []byte, lit []byte) int
-
-// emitRepeat writes a repeat chunk and returns the number of bytes written.
-// Length must be at least 4 and < 1<<32
-//
-//go:noescape
-func emitRepeat(dst []byte, offset int, length int) int
-
-// emitCopy writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-// dst is long enough to hold the encoded bytes
-// 1 <= offset && offset <= math.MaxUint32
-// 4 <= length && length <= 1 << 24
-//
-//go:noescape
-func emitCopy(dst []byte, offset int, length int) int
-
-// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-// dst is long enough to hold the encoded bytes
-// 1 <= offset && offset <= math.MaxUint32
-// 4 <= length && length <= 1 << 24
-//
-//go:noescape
-func emitCopyNoRepeat(dst []byte, offset int, length int) int
-
-// matchLen returns how many bytes match in a and b
-//
-// It assumes that:
-// len(a) <= len(b)
-//
-//go:noescape
-func matchLen(a []byte, b []byte) int
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
deleted file mode 100644
index 729dbf536..000000000
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ /dev/null
@@ -1,16701 +0,0 @@
-// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
-
-//go:build !appengine && !noasm && gc && !noasm
-// +build !appengine,!noasm,gc,!noasm
-
-#include "textflag.h"
-
-// func encodeBlockAsm(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm(SB), $65560-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000200, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBlockAsm:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBlockAsm
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBlockAsm:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x06, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBlockAsm
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- SHLQ $0x10, R11
- IMULQ R9, R11
- SHRQ $0x32, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeBlockAsm
- LEAL 1(CX), DI
- MOVL 12(SP), R8
- MOVL DI, SI
- SUBL 16(SP), SI
- JZ repeat_extend_back_end_encodeBlockAsm
-
-repeat_extend_back_loop_encodeBlockAsm:
- CMPL DI, R8
- JLE repeat_extend_back_end_encodeBlockAsm
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeBlockAsm
- LEAL -1(DI), DI
- DECL SI
- JNZ repeat_extend_back_loop_encodeBlockAsm
-
-repeat_extend_back_end_encodeBlockAsm:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeBlockAsm
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeBlockAsm
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeBlockAsm
- CMPL SI, $0x00010000
- JLT three_bytes_repeat_emit_encodeBlockAsm
- CMPL SI, $0x01000000
- JLT four_bytes_repeat_emit_encodeBlockAsm
- MOVB $0xfc, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_repeat_emit_encodeBlockAsm
-
-four_bytes_repeat_emit_encodeBlockAsm:
- MOVL SI, R11
- SHRL $0x10, R11
- MOVB $0xf8, (AX)
- MOVW SI, 1(AX)
- MOVB R11, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_repeat_emit_encodeBlockAsm
-
-three_bytes_repeat_emit_encodeBlockAsm:
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeBlockAsm
-
-two_bytes_repeat_emit_encodeBlockAsm:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeBlockAsm
- JMP memmove_long_repeat_emit_encodeBlockAsm
-
-one_byte_repeat_emit_encodeBlockAsm:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeBlockAsm:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_repeat_emit_encodeBlockAsm:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeBlockAsm
-
-memmove_long_repeat_emit_encodeBlockAsm:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R12
- SHRQ $0x05, R12
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R13
- SUBQ R11, R13
- DECQ R12
- JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R10)(R13*1), R11
- LEAQ -32(AX)(R13*1), R14
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
- ADDQ $0x20, R14
- ADDQ $0x20, R11
- ADDQ $0x20, R13
- DECQ R12
- JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R10)(R13*1), X4
- MOVOU -16(R10)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R9, R13
- JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeBlockAsm:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R9
- SUBL CX, R9
- LEAQ (DX)(CX*1), R10
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R12, R12
- CMPL R9, $0x08
- JL matchlen_match4_repeat_extend_encodeBlockAsm
-
-matchlen_loopback_repeat_extend_encodeBlockAsm:
- MOVQ (R10)(R12*1), R11
- XORQ (SI)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_repeat_extend_encodeBlockAsm
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP repeat_extend_forward_end_encodeBlockAsm
-
-matchlen_loop_repeat_extend_encodeBlockAsm:
- LEAL -8(R9), R9
- LEAL 8(R12), R12
- CMPL R9, $0x08
- JGE matchlen_loopback_repeat_extend_encodeBlockAsm
- JZ repeat_extend_forward_end_encodeBlockAsm
-
-matchlen_match4_repeat_extend_encodeBlockAsm:
- CMPL R9, $0x04
- JL matchlen_match2_repeat_extend_encodeBlockAsm
- MOVL (R10)(R12*1), R11
- CMPL (SI)(R12*1), R11
- JNE matchlen_match2_repeat_extend_encodeBlockAsm
- SUBL $0x04, R9
- LEAL 4(R12), R12
-
-matchlen_match2_repeat_extend_encodeBlockAsm:
- CMPL R9, $0x02
- JL matchlen_match1_repeat_extend_encodeBlockAsm
- MOVW (R10)(R12*1), R11
- CMPW (SI)(R12*1), R11
- JNE matchlen_match1_repeat_extend_encodeBlockAsm
- SUBL $0x02, R9
- LEAL 2(R12), R12
-
-matchlen_match1_repeat_extend_encodeBlockAsm:
- CMPL R9, $0x01
- JL repeat_extend_forward_end_encodeBlockAsm
- MOVB (R10)(R12*1), R11
- CMPB (SI)(R12*1), R11
- JNE repeat_extend_forward_end_encodeBlockAsm
- LEAL 1(R12), R12
-
-repeat_extend_forward_end_encodeBlockAsm:
- ADDL R12, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
- TESTL R8, R8
- JZ repeat_as_copy_encodeBlockAsm
-
- // emitRepeat
-emit_repeat_again_match_repeat_encodeBlockAsm:
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_match_repeat_encodeBlockAsm
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm
- CMPL DI, $0x00000800
- JLT repeat_two_offset_match_repeat_encodeBlockAsm
-
-cant_repeat_two_offset_match_repeat_encodeBlockAsm:
- CMPL SI, $0x00000104
- JLT repeat_three_match_repeat_encodeBlockAsm
- CMPL SI, $0x00010100
- JLT repeat_four_match_repeat_encodeBlockAsm
- CMPL SI, $0x0100ffff
- JLT repeat_five_match_repeat_encodeBlockAsm
- LEAL -16842747(SI), SI
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- JMP emit_repeat_again_match_repeat_encodeBlockAsm
-
-repeat_five_match_repeat_encodeBlockAsm:
- LEAL -65536(SI), SI
- MOVL SI, DI
- MOVW $0x001d, (AX)
- MOVW SI, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_four_match_repeat_encodeBlockAsm:
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_three_match_repeat_encodeBlockAsm:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_two_match_repeat_encodeBlockAsm:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_two_offset_match_repeat_encodeBlockAsm:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_as_copy_encodeBlockAsm:
- // emitCopy
- CMPL DI, $0x00010000
- JL two_byte_offset_repeat_as_copy_encodeBlockAsm
-
-four_bytes_loop_back_repeat_as_copy_encodeBlockAsm:
- CMPL SI, $0x40
- JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm
- MOVB $0xff, (AX)
- MOVL DI, 1(AX)
- LEAL -64(SI), SI
- ADDQ $0x05, AX
- CMPL SI, $0x04
- JL four_bytes_remain_repeat_as_copy_encodeBlockAsm
-
- // emitRepeat
-emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
- CMPL DI, $0x00000800
- JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
-
-cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
- CMPL SI, $0x00000104
- JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
- CMPL SI, $0x00010100
- JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
- CMPL SI, $0x0100ffff
- JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
- LEAL -16842747(SI), SI
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
-
-repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
- LEAL -65536(SI), SI
- MOVL SI, DI
- MOVW $0x001d, (AX)
- MOVW SI, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm
- JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm
-
-four_bytes_remain_repeat_as_copy_encodeBlockAsm:
- TESTL SI, SI
- JZ repeat_end_emit_encodeBlockAsm
- MOVB $0x03, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVL DI, 1(AX)
- ADDQ $0x05, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-two_byte_offset_repeat_as_copy_encodeBlockAsm:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
-
- // emitRepeat
-emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
- CMPL DI, $0x00000800
- JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
-
-cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- CMPL SI, $0x00000104
- JLT repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
- CMPL SI, $0x00010100
- JLT repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
- CMPL SI, $0x0100ffff
- JLT repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
- LEAL -16842747(SI), SI
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
-
-repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- LEAL -65536(SI), SI
- MOVL SI, DI
- MOVW $0x001d, (AX)
- MOVW SI, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm
- JMP two_byte_offset_repeat_as_copy_encodeBlockAsm
-
-two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
- CMPL DI, $0x00000800
- JGE emit_copy_three_repeat_as_copy_encodeBlockAsm
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm
-
-emit_copy_three_repeat_as_copy_encodeBlockAsm:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeBlockAsm:
- MOVL CX, 12(SP)
- JMP search_loop_encodeBlockAsm
-
-no_repeat_found_encodeBlockAsm:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBlockAsm
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeBlockAsm
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeBlockAsm
- MOVL 20(SP), CX
- JMP search_loop_encodeBlockAsm
-
-candidate3_match_encodeBlockAsm:
- ADDL $0x02, CX
- JMP candidate_match_encodeBlockAsm
-
-candidate2_match_encodeBlockAsm:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeBlockAsm:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBlockAsm
-
-match_extend_back_loop_encodeBlockAsm:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBlockAsm
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBlockAsm
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBlockAsm
- JMP match_extend_back_loop_encodeBlockAsm
-
-match_extend_back_end_encodeBlockAsm:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 5(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBlockAsm:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeBlockAsm
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeBlockAsm
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeBlockAsm
- CMPL R8, $0x00010000
- JLT three_bytes_match_emit_encodeBlockAsm
- CMPL R8, $0x01000000
- JLT four_bytes_match_emit_encodeBlockAsm
- MOVB $0xfc, (AX)
- MOVL R8, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_match_emit_encodeBlockAsm
-
-four_bytes_match_emit_encodeBlockAsm:
- MOVL R8, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW R8, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_match_emit_encodeBlockAsm
-
-three_bytes_match_emit_encodeBlockAsm:
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBlockAsm
-
-two_bytes_match_emit_encodeBlockAsm:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeBlockAsm
- JMP memmove_long_match_emit_encodeBlockAsm
-
-one_byte_match_emit_encodeBlockAsm:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBlockAsm:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeBlockAsm
-
-emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm
-
-emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm
-
-emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBlockAsm:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeBlockAsm
-
-memmove_long_match_emit_encodeBlockAsm:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeBlockAsm:
-match_nolit_loop_encodeBlockAsm:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeBlockAsm
-
-matchlen_loopback_match_nolit_encodeBlockAsm:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeBlockAsm
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeBlockAsm
-
-matchlen_loop_match_nolit_encodeBlockAsm:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeBlockAsm
- JZ match_nolit_end_encodeBlockAsm
-
-matchlen_match4_match_nolit_encodeBlockAsm:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeBlockAsm
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeBlockAsm
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeBlockAsm:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeBlockAsm
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeBlockAsm
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeBlockAsm:
- CMPL DI, $0x01
- JL match_nolit_end_encodeBlockAsm
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeBlockAsm
- LEAL 1(R10), R10
-
-match_nolit_end_encodeBlockAsm:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
- CMPL SI, $0x00010000
- JL two_byte_offset_match_nolit_encodeBlockAsm
-
-four_bytes_loop_back_match_nolit_encodeBlockAsm:
- CMPL R10, $0x40
- JLE four_bytes_remain_match_nolit_encodeBlockAsm
- MOVB $0xff, (AX)
- MOVL SI, 1(AX)
- LEAL -64(R10), R10
- ADDQ $0x05, AX
- CMPL R10, $0x04
- JL four_bytes_remain_match_nolit_encodeBlockAsm
-
- // emitRepeat
-emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
- MOVL R10, DI
- LEAL -4(R10), R10
- CMPL DI, $0x08
- JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy
- CMPL DI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
- CMPL SI, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
-
-cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
- CMPL R10, $0x00000104
- JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy
- CMPL R10, $0x00010100
- JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy
- CMPL R10, $0x0100ffff
- JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy
- LEAL -16842747(R10), R10
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
-
-repeat_five_match_nolit_encodeBlockAsm_emit_copy:
- LEAL -65536(R10), R10
- MOVL R10, SI
- MOVW $0x001d, (AX)
- MOVW R10, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-repeat_four_match_nolit_encodeBlockAsm_emit_copy:
- LEAL -256(R10), R10
- MOVW $0x0019, (AX)
- MOVW R10, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-repeat_three_match_nolit_encodeBlockAsm_emit_copy:
- LEAL -4(R10), R10
- MOVW $0x0015, (AX)
- MOVB R10, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-repeat_two_match_nolit_encodeBlockAsm_emit_copy:
- SHLL $0x02, R10
- ORL $0x01, R10
- MOVW R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
- XORQ DI, DI
- LEAL 1(DI)(R10*4), R10
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
- JMP four_bytes_loop_back_match_nolit_encodeBlockAsm
-
-four_bytes_remain_match_nolit_encodeBlockAsm:
- TESTL R10, R10
- JZ match_nolit_emitcopy_end_encodeBlockAsm
- MOVB $0x03, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-two_byte_offset_match_nolit_encodeBlockAsm:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBlockAsm
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
-
- // emitRepeat
-emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
- MOVL R10, DI
- LEAL -4(R10), R10
- CMPL DI, $0x08
- JLE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
- CMPL DI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
- CMPL SI, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
- CMPL R10, $0x00000104
- JLT repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
- CMPL R10, $0x00010100
- JLT repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
- CMPL R10, $0x0100ffff
- JLT repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
- LEAL -16842747(R10), R10
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
-
-repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
- LEAL -65536(R10), R10
- MOVL R10, SI
- MOVW $0x001d, (AX)
- MOVW R10, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
- LEAL -256(R10), R10
- MOVW $0x0019, (AX)
- MOVW R10, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
- LEAL -4(R10), R10
- MOVW $0x0015, (AX)
- MOVB R10, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
- SHLL $0x02, R10
- ORL $0x01, R10
- MOVW R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
- XORQ DI, DI
- LEAL 1(DI)(R10*4), R10
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
- JMP two_byte_offset_match_nolit_encodeBlockAsm
-
-two_byte_offset_short_match_nolit_encodeBlockAsm:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeBlockAsm
- CMPL SI, $0x00000800
- JGE emit_copy_three_match_nolit_encodeBlockAsm
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm
-
-emit_copy_three_match_nolit_encodeBlockAsm:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeBlockAsm:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBlockAsm
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBlockAsm:
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x10, R8
- IMULQ R9, R8
- SHRQ $0x32, R8
- SHLQ $0x10, SI
- IMULQ R9, SI
- SHRQ $0x32, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeBlockAsm
- INCL CX
- JMP search_loop_encodeBlockAsm
-
-emit_remainder_encodeBlockAsm:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 5(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBlockAsm:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBlockAsm
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBlockAsm
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBlockAsm
- CMPL DX, $0x00010000
- JLT three_bytes_emit_remainder_encodeBlockAsm
- CMPL DX, $0x01000000
- JLT four_bytes_emit_remainder_encodeBlockAsm
- MOVB $0xfc, (AX)
- MOVL DX, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_emit_remainder_encodeBlockAsm
-
-four_bytes_emit_remainder_encodeBlockAsm:
- MOVL DX, BX
- SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_emit_remainder_encodeBlockAsm
-
-three_bytes_emit_remainder_encodeBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBlockAsm
-
-two_bytes_emit_remainder_encodeBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBlockAsm
- JMP memmove_long_emit_remainder_encodeBlockAsm
-
-one_byte_emit_remainder_encodeBlockAsm:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBlockAsm:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBlockAsm:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBlockAsm
-
-memmove_long_emit_remainder_encodeBlockAsm:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBlockAsm:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeBlockAsm4MB(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm4MB(SB), $65560-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000200, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBlockAsm4MB:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBlockAsm4MB
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBlockAsm4MB:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x06, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBlockAsm4MB
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- SHLQ $0x10, R11
- IMULQ R9, R11
- SHRQ $0x32, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeBlockAsm4MB
- LEAL 1(CX), DI
- MOVL 12(SP), R8
- MOVL DI, SI
- SUBL 16(SP), SI
- JZ repeat_extend_back_end_encodeBlockAsm4MB
-
-repeat_extend_back_loop_encodeBlockAsm4MB:
- CMPL DI, R8
- JLE repeat_extend_back_end_encodeBlockAsm4MB
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeBlockAsm4MB
- LEAL -1(DI), DI
- DECL SI
- JNZ repeat_extend_back_loop_encodeBlockAsm4MB
-
-repeat_extend_back_end_encodeBlockAsm4MB:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeBlockAsm4MB
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeBlockAsm4MB
- CMPL SI, $0x00010000
- JLT three_bytes_repeat_emit_encodeBlockAsm4MB
- MOVL SI, R11
- SHRL $0x10, R11
- MOVB $0xf8, (AX)
- MOVW SI, 1(AX)
- MOVB R11, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_repeat_emit_encodeBlockAsm4MB
-
-three_bytes_repeat_emit_encodeBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeBlockAsm4MB
-
-two_bytes_repeat_emit_encodeBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeBlockAsm4MB
- JMP memmove_long_repeat_emit_encodeBlockAsm4MB
-
-one_byte_repeat_emit_encodeBlockAsm4MB:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeBlockAsm4MB:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB
-
-memmove_long_repeat_emit_encodeBlockAsm4MB:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R12
- SHRQ $0x05, R12
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R13
- SUBQ R11, R13
- DECQ R12
- JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(R10)(R13*1), R11
- LEAQ -32(AX)(R13*1), R14
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
- ADDQ $0x20, R14
- ADDQ $0x20, R11
- ADDQ $0x20, R13
- DECQ R12
- JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(R10)(R13*1), X4
- MOVOU -16(R10)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R9, R13
- JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeBlockAsm4MB:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R9
- SUBL CX, R9
- LEAQ (DX)(CX*1), R10
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R12, R12
- CMPL R9, $0x08
- JL matchlen_match4_repeat_extend_encodeBlockAsm4MB
-
-matchlen_loopback_repeat_extend_encodeBlockAsm4MB:
- MOVQ (R10)(R12*1), R11
- XORQ (SI)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_repeat_extend_encodeBlockAsm4MB
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP repeat_extend_forward_end_encodeBlockAsm4MB
-
-matchlen_loop_repeat_extend_encodeBlockAsm4MB:
- LEAL -8(R9), R9
- LEAL 8(R12), R12
- CMPL R9, $0x08
- JGE matchlen_loopback_repeat_extend_encodeBlockAsm4MB
- JZ repeat_extend_forward_end_encodeBlockAsm4MB
-
-matchlen_match4_repeat_extend_encodeBlockAsm4MB:
- CMPL R9, $0x04
- JL matchlen_match2_repeat_extend_encodeBlockAsm4MB
- MOVL (R10)(R12*1), R11
- CMPL (SI)(R12*1), R11
- JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB
- SUBL $0x04, R9
- LEAL 4(R12), R12
-
-matchlen_match2_repeat_extend_encodeBlockAsm4MB:
- CMPL R9, $0x02
- JL matchlen_match1_repeat_extend_encodeBlockAsm4MB
- MOVW (R10)(R12*1), R11
- CMPW (SI)(R12*1), R11
- JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB
- SUBL $0x02, R9
- LEAL 2(R12), R12
-
-matchlen_match1_repeat_extend_encodeBlockAsm4MB:
- CMPL R9, $0x01
- JL repeat_extend_forward_end_encodeBlockAsm4MB
- MOVB (R10)(R12*1), R11
- CMPB (SI)(R12*1), R11
- JNE repeat_extend_forward_end_encodeBlockAsm4MB
- LEAL 1(R12), R12
-
-repeat_extend_forward_end_encodeBlockAsm4MB:
- ADDL R12, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
- TESTL R8, R8
- JZ repeat_as_copy_encodeBlockAsm4MB
-
- // emitRepeat
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_match_repeat_encodeBlockAsm4MB
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
- CMPL DI, $0x00000800
- JLT repeat_two_offset_match_repeat_encodeBlockAsm4MB
-
-cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
- CMPL SI, $0x00000104
- JLT repeat_three_match_repeat_encodeBlockAsm4MB
- CMPL SI, $0x00010100
- JLT repeat_four_match_repeat_encodeBlockAsm4MB
- LEAL -65536(SI), SI
- MOVL SI, DI
- MOVW $0x001d, (AX)
- MOVW SI, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_four_match_repeat_encodeBlockAsm4MB:
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_three_match_repeat_encodeBlockAsm4MB:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_two_match_repeat_encodeBlockAsm4MB:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_two_offset_match_repeat_encodeBlockAsm4MB:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_as_copy_encodeBlockAsm4MB:
- // emitCopy
- CMPL DI, $0x00010000
- JL two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
-
-four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB:
- CMPL SI, $0x40
- JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
- MOVB $0xff, (AX)
- MOVL DI, 1(AX)
- LEAL -64(SI), SI
- ADDQ $0x05, AX
- CMPL SI, $0x04
- JL four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
-
- // emitRepeat
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
- CMPL DI, $0x00000800
- JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
-
-cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- CMPL SI, $0x00000104
- JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
- CMPL SI, $0x00010100
- JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
- LEAL -65536(SI), SI
- MOVL SI, DI
- MOVW $0x001d, (AX)
- MOVW SI, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
- JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm4MB
-
-four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
- TESTL SI, SI
- JZ repeat_end_emit_encodeBlockAsm4MB
- MOVB $0x03, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVL DI, 1(AX)
- ADDQ $0x05, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
- CMPL DI, $0x00000800
- JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
-
-cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- CMPL SI, $0x00000104
- JLT repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
- CMPL SI, $0x00010100
- JLT repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
- LEAL -65536(SI), SI
- MOVL SI, DI
- MOVW $0x001d, (AX)
- MOVW SI, 2(AX)
- SARL $0x10, DI
- MOVB DI, 4(AX)
- ADDQ $0x05, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
- JMP two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
-
-two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
- CMPL DI, $0x00000800
- JGE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm4MB
-
-emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeBlockAsm4MB:
- MOVL CX, 12(SP)
- JMP search_loop_encodeBlockAsm4MB
-
-no_repeat_found_encodeBlockAsm4MB:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBlockAsm4MB
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeBlockAsm4MB
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeBlockAsm4MB
- MOVL 20(SP), CX
- JMP search_loop_encodeBlockAsm4MB
-
-candidate3_match_encodeBlockAsm4MB:
- ADDL $0x02, CX
- JMP candidate_match_encodeBlockAsm4MB
-
-candidate2_match_encodeBlockAsm4MB:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeBlockAsm4MB:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBlockAsm4MB
-
-match_extend_back_loop_encodeBlockAsm4MB:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBlockAsm4MB
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBlockAsm4MB
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBlockAsm4MB
- JMP match_extend_back_loop_encodeBlockAsm4MB
-
-match_extend_back_end_encodeBlockAsm4MB:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 4(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBlockAsm4MB:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeBlockAsm4MB
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeBlockAsm4MB
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeBlockAsm4MB
- CMPL R8, $0x00010000
- JLT three_bytes_match_emit_encodeBlockAsm4MB
- MOVL R8, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW R8, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_match_emit_encodeBlockAsm4MB
-
-three_bytes_match_emit_encodeBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBlockAsm4MB
-
-two_bytes_match_emit_encodeBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeBlockAsm4MB
- JMP memmove_long_match_emit_encodeBlockAsm4MB
-
-one_byte_match_emit_encodeBlockAsm4MB:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBlockAsm4MB:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
-
-emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
-
-emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm4MB
-
-emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBlockAsm4MB:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeBlockAsm4MB
-
-memmove_long_match_emit_encodeBlockAsm4MB:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeBlockAsm4MB:
-match_nolit_loop_encodeBlockAsm4MB:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeBlockAsm4MB
-
-matchlen_loopback_match_nolit_encodeBlockAsm4MB:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeBlockAsm4MB
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeBlockAsm4MB
-
-matchlen_loop_match_nolit_encodeBlockAsm4MB:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeBlockAsm4MB
- JZ match_nolit_end_encodeBlockAsm4MB
-
-matchlen_match4_match_nolit_encodeBlockAsm4MB:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeBlockAsm4MB
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeBlockAsm4MB
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeBlockAsm4MB:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeBlockAsm4MB
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeBlockAsm4MB
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeBlockAsm4MB:
- CMPL DI, $0x01
- JL match_nolit_end_encodeBlockAsm4MB
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeBlockAsm4MB
- LEAL 1(R10), R10
-
-match_nolit_end_encodeBlockAsm4MB:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
- CMPL SI, $0x00010000
- JL two_byte_offset_match_nolit_encodeBlockAsm4MB
-
-four_bytes_loop_back_match_nolit_encodeBlockAsm4MB:
- CMPL R10, $0x40
- JLE four_bytes_remain_match_nolit_encodeBlockAsm4MB
- MOVB $0xff, (AX)
- MOVL SI, 1(AX)
- LEAL -64(R10), R10
- ADDQ $0x05, AX
- CMPL R10, $0x04
- JL four_bytes_remain_match_nolit_encodeBlockAsm4MB
-
- // emitRepeat
- MOVL R10, DI
- LEAL -4(R10), R10
- CMPL DI, $0x08
- JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
- CMPL DI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
- CMPL SI, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
-
-cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
- CMPL R10, $0x00000104
- JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
- CMPL R10, $0x00010100
- JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
- LEAL -65536(R10), R10
- MOVL R10, SI
- MOVW $0x001d, (AX)
- MOVW R10, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
- LEAL -256(R10), R10
- MOVW $0x0019, (AX)
- MOVW R10, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
- LEAL -4(R10), R10
- MOVW $0x0015, (AX)
- MOVB R10, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
- SHLL $0x02, R10
- ORL $0x01, R10
- MOVW R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
- XORQ DI, DI
- LEAL 1(DI)(R10*4), R10
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
- JMP four_bytes_loop_back_match_nolit_encodeBlockAsm4MB
-
-four_bytes_remain_match_nolit_encodeBlockAsm4MB:
- TESTL R10, R10
- JZ match_nolit_emitcopy_end_encodeBlockAsm4MB
- MOVB $0x03, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-two_byte_offset_match_nolit_encodeBlockAsm4MB:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBlockAsm4MB
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL R10, DI
- LEAL -4(R10), R10
- CMPL DI, $0x08
- JLE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
- CMPL DI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
- CMPL SI, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- CMPL R10, $0x00000104
- JLT repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
- CMPL R10, $0x00010100
- JLT repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
- LEAL -65536(R10), R10
- MOVL R10, SI
- MOVW $0x001d, (AX)
- MOVW R10, 2(AX)
- SARL $0x10, SI
- MOVB SI, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- LEAL -256(R10), R10
- MOVW $0x0019, (AX)
- MOVW R10, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- LEAL -4(R10), R10
- MOVW $0x0015, (AX)
- MOVB R10, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- SHLL $0x02, R10
- ORL $0x01, R10
- MOVW R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
- XORQ DI, DI
- LEAL 1(DI)(R10*4), R10
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
- JMP two_byte_offset_match_nolit_encodeBlockAsm4MB
-
-two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeBlockAsm4MB
- CMPL SI, $0x00000800
- JGE emit_copy_three_match_nolit_encodeBlockAsm4MB
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm4MB
-
-emit_copy_three_match_nolit_encodeBlockAsm4MB:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeBlockAsm4MB:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBlockAsm4MB
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBlockAsm4MB:
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x10, R8
- IMULQ R9, R8
- SHRQ $0x32, R8
- SHLQ $0x10, SI
- IMULQ R9, SI
- SHRQ $0x32, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeBlockAsm4MB
- INCL CX
- JMP search_loop_encodeBlockAsm4MB
-
-emit_remainder_encodeBlockAsm4MB:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 4(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBlockAsm4MB:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBlockAsm4MB
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBlockAsm4MB
- CMPL DX, $0x00010000
- JLT three_bytes_emit_remainder_encodeBlockAsm4MB
- MOVL DX, BX
- SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_emit_remainder_encodeBlockAsm4MB
-
-three_bytes_emit_remainder_encodeBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBlockAsm4MB
-
-two_bytes_emit_remainder_encodeBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBlockAsm4MB
- JMP memmove_long_emit_remainder_encodeBlockAsm4MB
-
-one_byte_emit_remainder_encodeBlockAsm4MB:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBlockAsm4MB:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB
-
-memmove_long_emit_remainder_encodeBlockAsm4MB:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBlockAsm4MB:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeBlockAsm12B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm12B(SB), $16408-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000080, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBlockAsm12B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBlockAsm12B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBlockAsm12B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x05, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBlockAsm12B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x000000cf1bbcdcbb, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x18, R10
- IMULQ R9, R10
- SHRQ $0x34, R10
- SHLQ $0x18, R11
- IMULQ R9, R11
- SHRQ $0x34, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x18, R10
- IMULQ R9, R10
- SHRQ $0x34, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeBlockAsm12B
- LEAL 1(CX), DI
- MOVL 12(SP), R8
- MOVL DI, SI
- SUBL 16(SP), SI
- JZ repeat_extend_back_end_encodeBlockAsm12B
-
-repeat_extend_back_loop_encodeBlockAsm12B:
- CMPL DI, R8
- JLE repeat_extend_back_end_encodeBlockAsm12B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeBlockAsm12B
- LEAL -1(DI), DI
- DECL SI
- JNZ repeat_extend_back_loop_encodeBlockAsm12B
-
-repeat_extend_back_end_encodeBlockAsm12B:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeBlockAsm12B
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeBlockAsm12B
-
-two_bytes_repeat_emit_encodeBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeBlockAsm12B
- JMP memmove_long_repeat_emit_encodeBlockAsm12B
-
-one_byte_repeat_emit_encodeBlockAsm12B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeBlockAsm12B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_repeat_emit_encodeBlockAsm12B:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeBlockAsm12B
-
-memmove_long_repeat_emit_encodeBlockAsm12B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R12
- SHRQ $0x05, R12
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R13
- SUBQ R11, R13
- DECQ R12
- JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R13*1), R11
- LEAQ -32(AX)(R13*1), R14
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
- ADDQ $0x20, R14
- ADDQ $0x20, R11
- ADDQ $0x20, R13
- DECQ R12
- JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R13*1), X4
- MOVOU -16(R10)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R9, R13
- JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeBlockAsm12B:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R9
- SUBL CX, R9
- LEAQ (DX)(CX*1), R10
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R12, R12
- CMPL R9, $0x08
- JL matchlen_match4_repeat_extend_encodeBlockAsm12B
-
-matchlen_loopback_repeat_extend_encodeBlockAsm12B:
- MOVQ (R10)(R12*1), R11
- XORQ (SI)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_repeat_extend_encodeBlockAsm12B
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP repeat_extend_forward_end_encodeBlockAsm12B
-
-matchlen_loop_repeat_extend_encodeBlockAsm12B:
- LEAL -8(R9), R9
- LEAL 8(R12), R12
- CMPL R9, $0x08
- JGE matchlen_loopback_repeat_extend_encodeBlockAsm12B
- JZ repeat_extend_forward_end_encodeBlockAsm12B
-
-matchlen_match4_repeat_extend_encodeBlockAsm12B:
- CMPL R9, $0x04
- JL matchlen_match2_repeat_extend_encodeBlockAsm12B
- MOVL (R10)(R12*1), R11
- CMPL (SI)(R12*1), R11
- JNE matchlen_match2_repeat_extend_encodeBlockAsm12B
- SUBL $0x04, R9
- LEAL 4(R12), R12
-
-matchlen_match2_repeat_extend_encodeBlockAsm12B:
- CMPL R9, $0x02
- JL matchlen_match1_repeat_extend_encodeBlockAsm12B
- MOVW (R10)(R12*1), R11
- CMPW (SI)(R12*1), R11
- JNE matchlen_match1_repeat_extend_encodeBlockAsm12B
- SUBL $0x02, R9
- LEAL 2(R12), R12
-
-matchlen_match1_repeat_extend_encodeBlockAsm12B:
- CMPL R9, $0x01
- JL repeat_extend_forward_end_encodeBlockAsm12B
- MOVB (R10)(R12*1), R11
- CMPB (SI)(R12*1), R11
- JNE repeat_extend_forward_end_encodeBlockAsm12B
- LEAL 1(R12), R12
-
-repeat_extend_forward_end_encodeBlockAsm12B:
- ADDL R12, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
- TESTL R8, R8
- JZ repeat_as_copy_encodeBlockAsm12B
-
- // emitRepeat
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_match_repeat_encodeBlockAsm12B
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
- CMPL DI, $0x00000800
- JLT repeat_two_offset_match_repeat_encodeBlockAsm12B
-
-cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
- CMPL SI, $0x00000104
- JLT repeat_three_match_repeat_encodeBlockAsm12B
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm12B
-
-repeat_three_match_repeat_encodeBlockAsm12B:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm12B
-
-repeat_two_match_repeat_encodeBlockAsm12B:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm12B
-
-repeat_two_offset_match_repeat_encodeBlockAsm12B:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm12B
-
-repeat_as_copy_encodeBlockAsm12B:
- // emitCopy
-two_byte_offset_repeat_as_copy_encodeBlockAsm12B:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
- CMPL DI, $0x00000800
- JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
-
-cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
- CMPL SI, $0x00000104
- JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm12B
-
-repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm12B
-
-repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm12B
-
-repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm12B
- JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B
-
-two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
- CMPL DI, $0x00000800
- JGE emit_copy_three_repeat_as_copy_encodeBlockAsm12B
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm12B
-
-emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeBlockAsm12B:
- MOVL CX, 12(SP)
- JMP search_loop_encodeBlockAsm12B
-
-no_repeat_found_encodeBlockAsm12B:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBlockAsm12B
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeBlockAsm12B
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeBlockAsm12B
- MOVL 20(SP), CX
- JMP search_loop_encodeBlockAsm12B
-
-candidate3_match_encodeBlockAsm12B:
- ADDL $0x02, CX
- JMP candidate_match_encodeBlockAsm12B
-
-candidate2_match_encodeBlockAsm12B:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeBlockAsm12B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBlockAsm12B
-
-match_extend_back_loop_encodeBlockAsm12B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBlockAsm12B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBlockAsm12B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBlockAsm12B
- JMP match_extend_back_loop_encodeBlockAsm12B
-
-match_extend_back_end_encodeBlockAsm12B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBlockAsm12B:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeBlockAsm12B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeBlockAsm12B
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBlockAsm12B
-
-two_bytes_match_emit_encodeBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeBlockAsm12B
- JMP memmove_long_match_emit_encodeBlockAsm12B
-
-one_byte_match_emit_encodeBlockAsm12B:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBlockAsm12B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBlockAsm12B:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeBlockAsm12B
-
-memmove_long_match_emit_encodeBlockAsm12B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeBlockAsm12B:
-match_nolit_loop_encodeBlockAsm12B:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeBlockAsm12B
-
-matchlen_loopback_match_nolit_encodeBlockAsm12B:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeBlockAsm12B
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeBlockAsm12B
-
-matchlen_loop_match_nolit_encodeBlockAsm12B:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeBlockAsm12B
- JZ match_nolit_end_encodeBlockAsm12B
-
-matchlen_match4_match_nolit_encodeBlockAsm12B:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeBlockAsm12B
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeBlockAsm12B
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeBlockAsm12B:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeBlockAsm12B
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeBlockAsm12B
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeBlockAsm12B:
- CMPL DI, $0x01
- JL match_nolit_end_encodeBlockAsm12B
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeBlockAsm12B
- LEAL 1(R10), R10
-
-match_nolit_end_encodeBlockAsm12B:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeBlockAsm12B:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBlockAsm12B
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL R10, DI
- LEAL -4(R10), R10
- CMPL DI, $0x08
- JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
- CMPL DI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
- CMPL SI, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
- CMPL R10, $0x00000104
- JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
- LEAL -256(R10), R10
- MOVW $0x0019, (AX)
- MOVW R10, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm12B
-
-repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
- LEAL -4(R10), R10
- MOVW $0x0015, (AX)
- MOVB R10, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm12B
-
-repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
- SHLL $0x02, R10
- ORL $0x01, R10
- MOVW R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm12B
-
-repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
- XORQ DI, DI
- LEAL 1(DI)(R10*4), R10
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm12B
- JMP two_byte_offset_match_nolit_encodeBlockAsm12B
-
-two_byte_offset_short_match_nolit_encodeBlockAsm12B:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeBlockAsm12B
- CMPL SI, $0x00000800
- JGE emit_copy_three_match_nolit_encodeBlockAsm12B
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm12B
-
-emit_copy_three_match_nolit_encodeBlockAsm12B:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeBlockAsm12B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBlockAsm12B
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBlockAsm12B:
- MOVQ $0x000000cf1bbcdcbb, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x18, R8
- IMULQ R9, R8
- SHRQ $0x34, R8
- SHLQ $0x18, SI
- IMULQ R9, SI
- SHRQ $0x34, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeBlockAsm12B
- INCL CX
- JMP search_loop_encodeBlockAsm12B
-
-emit_remainder_encodeBlockAsm12B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBlockAsm12B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBlockAsm12B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBlockAsm12B
-
-two_bytes_emit_remainder_encodeBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBlockAsm12B
- JMP memmove_long_emit_remainder_encodeBlockAsm12B
-
-one_byte_emit_remainder_encodeBlockAsm12B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBlockAsm12B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBlockAsm12B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBlockAsm12B
-
-memmove_long_emit_remainder_encodeBlockAsm12B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBlockAsm12B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeBlockAsm10B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm10B(SB), $4120-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000020, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBlockAsm10B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBlockAsm10B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBlockAsm10B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x05, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBlockAsm10B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x9e3779b1, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x20, R10
- IMULQ R9, R10
- SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ R9, R11
- SHRQ $0x36, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x20, R10
- IMULQ R9, R10
- SHRQ $0x36, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeBlockAsm10B
- LEAL 1(CX), DI
- MOVL 12(SP), R8
- MOVL DI, SI
- SUBL 16(SP), SI
- JZ repeat_extend_back_end_encodeBlockAsm10B
-
-repeat_extend_back_loop_encodeBlockAsm10B:
- CMPL DI, R8
- JLE repeat_extend_back_end_encodeBlockAsm10B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeBlockAsm10B
- LEAL -1(DI), DI
- DECL SI
- JNZ repeat_extend_back_loop_encodeBlockAsm10B
-
-repeat_extend_back_end_encodeBlockAsm10B:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeBlockAsm10B
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeBlockAsm10B
-
-two_bytes_repeat_emit_encodeBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeBlockAsm10B
- JMP memmove_long_repeat_emit_encodeBlockAsm10B
-
-one_byte_repeat_emit_encodeBlockAsm10B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeBlockAsm10B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_repeat_emit_encodeBlockAsm10B:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeBlockAsm10B
-
-memmove_long_repeat_emit_encodeBlockAsm10B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R12
- SHRQ $0x05, R12
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R13
- SUBQ R11, R13
- DECQ R12
- JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R13*1), R11
- LEAQ -32(AX)(R13*1), R14
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
- ADDQ $0x20, R14
- ADDQ $0x20, R11
- ADDQ $0x20, R13
- DECQ R12
- JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R13*1), X4
- MOVOU -16(R10)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R9, R13
- JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeBlockAsm10B:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R9
- SUBL CX, R9
- LEAQ (DX)(CX*1), R10
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R12, R12
- CMPL R9, $0x08
- JL matchlen_match4_repeat_extend_encodeBlockAsm10B
-
-matchlen_loopback_repeat_extend_encodeBlockAsm10B:
- MOVQ (R10)(R12*1), R11
- XORQ (SI)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_repeat_extend_encodeBlockAsm10B
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP repeat_extend_forward_end_encodeBlockAsm10B
-
-matchlen_loop_repeat_extend_encodeBlockAsm10B:
- LEAL -8(R9), R9
- LEAL 8(R12), R12
- CMPL R9, $0x08
- JGE matchlen_loopback_repeat_extend_encodeBlockAsm10B
- JZ repeat_extend_forward_end_encodeBlockAsm10B
-
-matchlen_match4_repeat_extend_encodeBlockAsm10B:
- CMPL R9, $0x04
- JL matchlen_match2_repeat_extend_encodeBlockAsm10B
- MOVL (R10)(R12*1), R11
- CMPL (SI)(R12*1), R11
- JNE matchlen_match2_repeat_extend_encodeBlockAsm10B
- SUBL $0x04, R9
- LEAL 4(R12), R12
-
-matchlen_match2_repeat_extend_encodeBlockAsm10B:
- CMPL R9, $0x02
- JL matchlen_match1_repeat_extend_encodeBlockAsm10B
- MOVW (R10)(R12*1), R11
- CMPW (SI)(R12*1), R11
- JNE matchlen_match1_repeat_extend_encodeBlockAsm10B
- SUBL $0x02, R9
- LEAL 2(R12), R12
-
-matchlen_match1_repeat_extend_encodeBlockAsm10B:
- CMPL R9, $0x01
- JL repeat_extend_forward_end_encodeBlockAsm10B
- MOVB (R10)(R12*1), R11
- CMPB (SI)(R12*1), R11
- JNE repeat_extend_forward_end_encodeBlockAsm10B
- LEAL 1(R12), R12
-
-repeat_extend_forward_end_encodeBlockAsm10B:
- ADDL R12, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
- TESTL R8, R8
- JZ repeat_as_copy_encodeBlockAsm10B
-
- // emitRepeat
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_match_repeat_encodeBlockAsm10B
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
- CMPL DI, $0x00000800
- JLT repeat_two_offset_match_repeat_encodeBlockAsm10B
-
-cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
- CMPL SI, $0x00000104
- JLT repeat_three_match_repeat_encodeBlockAsm10B
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm10B
-
-repeat_three_match_repeat_encodeBlockAsm10B:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm10B
-
-repeat_two_match_repeat_encodeBlockAsm10B:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm10B
-
-repeat_two_offset_match_repeat_encodeBlockAsm10B:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm10B
-
-repeat_as_copy_encodeBlockAsm10B:
- // emitCopy
-two_byte_offset_repeat_as_copy_encodeBlockAsm10B:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL SI, R8
- LEAL -4(SI), SI
- CMPL R8, $0x08
- JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
- CMPL R8, $0x0c
- JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
- CMPL DI, $0x00000800
- JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
-
-cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
- CMPL SI, $0x00000104
- JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm10B
-
-repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm10B
-
-repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm10B
-
-repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm10B
- JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B
-
-two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
- CMPL DI, $0x00000800
- JGE emit_copy_three_repeat_as_copy_encodeBlockAsm10B
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm10B
-
-emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeBlockAsm10B:
- MOVL CX, 12(SP)
- JMP search_loop_encodeBlockAsm10B
-
-no_repeat_found_encodeBlockAsm10B:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBlockAsm10B
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeBlockAsm10B
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeBlockAsm10B
- MOVL 20(SP), CX
- JMP search_loop_encodeBlockAsm10B
-
-candidate3_match_encodeBlockAsm10B:
- ADDL $0x02, CX
- JMP candidate_match_encodeBlockAsm10B
-
-candidate2_match_encodeBlockAsm10B:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeBlockAsm10B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBlockAsm10B
-
-match_extend_back_loop_encodeBlockAsm10B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBlockAsm10B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBlockAsm10B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBlockAsm10B
- JMP match_extend_back_loop_encodeBlockAsm10B
-
-match_extend_back_end_encodeBlockAsm10B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBlockAsm10B:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeBlockAsm10B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeBlockAsm10B
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBlockAsm10B
-
-two_bytes_match_emit_encodeBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeBlockAsm10B
- JMP memmove_long_match_emit_encodeBlockAsm10B
-
-one_byte_match_emit_encodeBlockAsm10B:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBlockAsm10B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBlockAsm10B:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeBlockAsm10B
-
-memmove_long_match_emit_encodeBlockAsm10B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeBlockAsm10B:
-match_nolit_loop_encodeBlockAsm10B:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeBlockAsm10B
-
-matchlen_loopback_match_nolit_encodeBlockAsm10B:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeBlockAsm10B
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeBlockAsm10B
-
-matchlen_loop_match_nolit_encodeBlockAsm10B:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeBlockAsm10B
- JZ match_nolit_end_encodeBlockAsm10B
-
-matchlen_match4_match_nolit_encodeBlockAsm10B:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeBlockAsm10B
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeBlockAsm10B
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeBlockAsm10B:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeBlockAsm10B
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeBlockAsm10B
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeBlockAsm10B:
- CMPL DI, $0x01
- JL match_nolit_end_encodeBlockAsm10B
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeBlockAsm10B
- LEAL 1(R10), R10
-
-match_nolit_end_encodeBlockAsm10B:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeBlockAsm10B:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBlockAsm10B
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL R10, DI
- LEAL -4(R10), R10
- CMPL DI, $0x08
- JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
- CMPL DI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
- CMPL SI, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
- CMPL R10, $0x00000104
- JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
- LEAL -256(R10), R10
- MOVW $0x0019, (AX)
- MOVW R10, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm10B
-
-repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
- LEAL -4(R10), R10
- MOVW $0x0015, (AX)
- MOVB R10, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm10B
-
-repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
- SHLL $0x02, R10
- ORL $0x01, R10
- MOVW R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm10B
-
-repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
- XORQ DI, DI
- LEAL 1(DI)(R10*4), R10
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm10B
- JMP two_byte_offset_match_nolit_encodeBlockAsm10B
-
-two_byte_offset_short_match_nolit_encodeBlockAsm10B:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeBlockAsm10B
- CMPL SI, $0x00000800
- JGE emit_copy_three_match_nolit_encodeBlockAsm10B
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm10B
-
-emit_copy_three_match_nolit_encodeBlockAsm10B:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeBlockAsm10B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBlockAsm10B
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBlockAsm10B:
- MOVQ $0x9e3779b1, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x20, R8
- IMULQ R9, R8
- SHRQ $0x36, R8
- SHLQ $0x20, SI
- IMULQ R9, SI
- SHRQ $0x36, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeBlockAsm10B
- INCL CX
- JMP search_loop_encodeBlockAsm10B
-
-emit_remainder_encodeBlockAsm10B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBlockAsm10B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBlockAsm10B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBlockAsm10B
-
-two_bytes_emit_remainder_encodeBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBlockAsm10B
- JMP memmove_long_emit_remainder_encodeBlockAsm10B
-
-one_byte_emit_remainder_encodeBlockAsm10B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBlockAsm10B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBlockAsm10B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBlockAsm10B
-
-memmove_long_emit_remainder_encodeBlockAsm10B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBlockAsm10B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeBlockAsm8B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBlockAsm8B(SB), $1048-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000008, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBlockAsm8B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBlockAsm8B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBlockAsm8B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x04, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBlockAsm8B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x9e3779b1, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x20, R10
- IMULQ R9, R10
- SHRQ $0x38, R10
- SHLQ $0x20, R11
- IMULQ R9, R11
- SHRQ $0x38, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x20, R10
- IMULQ R9, R10
- SHRQ $0x38, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeBlockAsm8B
- LEAL 1(CX), DI
- MOVL 12(SP), R8
- MOVL DI, SI
- SUBL 16(SP), SI
- JZ repeat_extend_back_end_encodeBlockAsm8B
-
-repeat_extend_back_loop_encodeBlockAsm8B:
- CMPL DI, R8
- JLE repeat_extend_back_end_encodeBlockAsm8B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeBlockAsm8B
- LEAL -1(DI), DI
- DECL SI
- JNZ repeat_extend_back_loop_encodeBlockAsm8B
-
-repeat_extend_back_end_encodeBlockAsm8B:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeBlockAsm8B
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeBlockAsm8B
-
-two_bytes_repeat_emit_encodeBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeBlockAsm8B
- JMP memmove_long_repeat_emit_encodeBlockAsm8B
-
-one_byte_repeat_emit_encodeBlockAsm8B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeBlockAsm8B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B
-
-emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_repeat_emit_encodeBlockAsm8B:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeBlockAsm8B
-
-memmove_long_repeat_emit_encodeBlockAsm8B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R12
- SHRQ $0x05, R12
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R13
- SUBQ R11, R13
- DECQ R12
- JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R13*1), R11
- LEAQ -32(AX)(R13*1), R14
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
- ADDQ $0x20, R14
- ADDQ $0x20, R11
- ADDQ $0x20, R13
- DECQ R12
- JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R13*1), X4
- MOVOU -16(R10)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R9, R13
- JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeBlockAsm8B:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R9
- SUBL CX, R9
- LEAQ (DX)(CX*1), R10
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R12, R12
- CMPL R9, $0x08
- JL matchlen_match4_repeat_extend_encodeBlockAsm8B
-
-matchlen_loopback_repeat_extend_encodeBlockAsm8B:
- MOVQ (R10)(R12*1), R11
- XORQ (SI)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_repeat_extend_encodeBlockAsm8B
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP repeat_extend_forward_end_encodeBlockAsm8B
-
-matchlen_loop_repeat_extend_encodeBlockAsm8B:
- LEAL -8(R9), R9
- LEAL 8(R12), R12
- CMPL R9, $0x08
- JGE matchlen_loopback_repeat_extend_encodeBlockAsm8B
- JZ repeat_extend_forward_end_encodeBlockAsm8B
-
-matchlen_match4_repeat_extend_encodeBlockAsm8B:
- CMPL R9, $0x04
- JL matchlen_match2_repeat_extend_encodeBlockAsm8B
- MOVL (R10)(R12*1), R11
- CMPL (SI)(R12*1), R11
- JNE matchlen_match2_repeat_extend_encodeBlockAsm8B
- SUBL $0x04, R9
- LEAL 4(R12), R12
-
-matchlen_match2_repeat_extend_encodeBlockAsm8B:
- CMPL R9, $0x02
- JL matchlen_match1_repeat_extend_encodeBlockAsm8B
- MOVW (R10)(R12*1), R11
- CMPW (SI)(R12*1), R11
- JNE matchlen_match1_repeat_extend_encodeBlockAsm8B
- SUBL $0x02, R9
- LEAL 2(R12), R12
-
-matchlen_match1_repeat_extend_encodeBlockAsm8B:
- CMPL R9, $0x01
- JL repeat_extend_forward_end_encodeBlockAsm8B
- MOVB (R10)(R12*1), R11
- CMPB (SI)(R12*1), R11
- JNE repeat_extend_forward_end_encodeBlockAsm8B
- LEAL 1(R12), R12
-
-repeat_extend_forward_end_encodeBlockAsm8B:
- ADDL R12, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
- TESTL R8, R8
- JZ repeat_as_copy_encodeBlockAsm8B
-
- // emitRepeat
- MOVL SI, DI
- LEAL -4(SI), SI
- CMPL DI, $0x08
- JLE repeat_two_match_repeat_encodeBlockAsm8B
- CMPL DI, $0x0c
- JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
-
-cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
- CMPL SI, $0x00000104
- JLT repeat_three_match_repeat_encodeBlockAsm8B
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm8B
-
-repeat_three_match_repeat_encodeBlockAsm8B:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm8B
-
-repeat_two_match_repeat_encodeBlockAsm8B:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm8B
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm8B
-
-repeat_as_copy_encodeBlockAsm8B:
- // emitCopy
-two_byte_offset_repeat_as_copy_encodeBlockAsm8B:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL SI, DI
- LEAL -4(SI), SI
- CMPL DI, $0x08
- JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
- CMPL DI, $0x0c
- JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
-
-cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
- CMPL SI, $0x00000104
- JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
- LEAL -256(SI), SI
- MOVW $0x0019, (AX)
- MOVW SI, 2(AX)
- ADDQ $0x04, AX
- JMP repeat_end_emit_encodeBlockAsm8B
-
-repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
- LEAL -4(SI), SI
- MOVW $0x0015, (AX)
- MOVB SI, 2(AX)
- ADDQ $0x03, AX
- JMP repeat_end_emit_encodeBlockAsm8B
-
-repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
- SHLL $0x02, SI
- ORL $0x01, SI
- MOVW SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm8B
- XORQ R8, R8
- LEAL 1(R8)(SI*4), SI
- MOVB DI, 1(AX)
- SARL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm8B
- JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B
-
-two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeBlockAsm8B
-
-emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeBlockAsm8B:
- MOVL CX, 12(SP)
- JMP search_loop_encodeBlockAsm8B
-
-no_repeat_found_encodeBlockAsm8B:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBlockAsm8B
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeBlockAsm8B
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeBlockAsm8B
- MOVL 20(SP), CX
- JMP search_loop_encodeBlockAsm8B
-
-candidate3_match_encodeBlockAsm8B:
- ADDL $0x02, CX
- JMP candidate_match_encodeBlockAsm8B
-
-candidate2_match_encodeBlockAsm8B:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeBlockAsm8B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBlockAsm8B
-
-match_extend_back_loop_encodeBlockAsm8B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBlockAsm8B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBlockAsm8B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBlockAsm8B
- JMP match_extend_back_loop_encodeBlockAsm8B
-
-match_extend_back_end_encodeBlockAsm8B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBlockAsm8B:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeBlockAsm8B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeBlockAsm8B
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBlockAsm8B
-
-two_bytes_match_emit_encodeBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeBlockAsm8B
- JMP memmove_long_match_emit_encodeBlockAsm8B
-
-one_byte_match_emit_encodeBlockAsm8B:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBlockAsm8B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBlockAsm8B:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeBlockAsm8B
-
-memmove_long_match_emit_encodeBlockAsm8B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeBlockAsm8B:
-match_nolit_loop_encodeBlockAsm8B:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeBlockAsm8B
-
-matchlen_loopback_match_nolit_encodeBlockAsm8B:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeBlockAsm8B
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeBlockAsm8B
-
-matchlen_loop_match_nolit_encodeBlockAsm8B:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeBlockAsm8B
- JZ match_nolit_end_encodeBlockAsm8B
-
-matchlen_match4_match_nolit_encodeBlockAsm8B:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeBlockAsm8B
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeBlockAsm8B
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeBlockAsm8B:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeBlockAsm8B
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeBlockAsm8B
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeBlockAsm8B:
- CMPL DI, $0x01
- JL match_nolit_end_encodeBlockAsm8B
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeBlockAsm8B
- LEAL 1(R10), R10
-
-match_nolit_end_encodeBlockAsm8B:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeBlockAsm8B:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBlockAsm8B
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL R10, SI
- LEAL -4(R10), R10
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
- CMPL R10, $0x00000104
- JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
- LEAL -256(R10), R10
- MOVW $0x0019, (AX)
- MOVW R10, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm8B
-
-repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
- LEAL -4(R10), R10
- MOVW $0x0015, (AX)
- MOVB R10, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm8B
-
-repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
- SHLL $0x02, R10
- ORL $0x01, R10
- MOVW R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm8B
- XORQ DI, DI
- LEAL 1(DI)(R10*4), R10
- MOVB SI, 1(AX)
- SARL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm8B
- JMP two_byte_offset_match_nolit_encodeBlockAsm8B
-
-two_byte_offset_short_match_nolit_encodeBlockAsm8B:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeBlockAsm8B
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBlockAsm8B
-
-emit_copy_three_match_nolit_encodeBlockAsm8B:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeBlockAsm8B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBlockAsm8B
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBlockAsm8B:
- MOVQ $0x9e3779b1, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x20, R8
- IMULQ R9, R8
- SHRQ $0x38, R8
- SHLQ $0x20, SI
- IMULQ R9, SI
- SHRQ $0x38, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeBlockAsm8B
- INCL CX
- JMP search_loop_encodeBlockAsm8B
-
-emit_remainder_encodeBlockAsm8B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBlockAsm8B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBlockAsm8B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBlockAsm8B
-
-two_bytes_emit_remainder_encodeBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBlockAsm8B
- JMP memmove_long_emit_remainder_encodeBlockAsm8B
-
-one_byte_emit_remainder_encodeBlockAsm8B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBlockAsm8B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBlockAsm8B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBlockAsm8B
-
-memmove_long_emit_remainder_encodeBlockAsm8B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBlockAsm8B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeBetterBlockAsm(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm(SB), $327704-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBetterBlockAsm:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBetterBlockAsm
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBetterBlockAsm:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x07, SI
- CMPL SI, $0x63
- JLE check_maxskip_ok_encodeBetterBlockAsm
- LEAL 100(CX), SI
- JMP check_maxskip_cont_encodeBetterBlockAsm
-
-check_maxskip_ok_encodeBetterBlockAsm:
- LEAL 1(CX)(SI*1), SI
-
-check_maxskip_cont_encodeBetterBlockAsm:
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x00cf1bbcdcbfa563, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x08, R10
- IMULQ R9, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x32, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm
-
-candidateS_match_encodeBetterBlockAsm:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x08, R10
- IMULQ R9, R10
- SHRQ $0x30, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeBetterBlockAsm:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBetterBlockAsm
-
-match_extend_back_loop_encodeBetterBlockAsm:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBetterBlockAsm
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBetterBlockAsm
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBetterBlockAsm
- JMP match_extend_back_loop_encodeBetterBlockAsm
-
-match_extend_back_end_encodeBetterBlockAsm:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 5(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBetterBlockAsm:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeBetterBlockAsm
-
-matchlen_loopback_match_nolit_encodeBetterBlockAsm:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeBetterBlockAsm
-
-matchlen_loop_match_nolit_encodeBetterBlockAsm:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm
- JZ match_nolit_end_encodeBetterBlockAsm
-
-matchlen_match4_match_nolit_encodeBetterBlockAsm:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeBetterBlockAsm
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeBetterBlockAsm
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeBetterBlockAsm:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeBetterBlockAsm
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeBetterBlockAsm
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeBetterBlockAsm:
- CMPL R8, $0x01
- JL match_nolit_end_encodeBetterBlockAsm
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeBetterBlockAsm
- LEAL 1(R12), R12
-
-match_nolit_end_encodeBetterBlockAsm:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- CMPL 16(SP), R8
- JEQ match_is_repeat_encodeBetterBlockAsm
- CMPL R12, $0x01
- JG match_length_ok_encodeBetterBlockAsm
- CMPL R8, $0x0000ffff
- JLE match_length_ok_encodeBetterBlockAsm
- MOVL 20(SP), CX
- INCL CX
- JMP search_loop_encodeBetterBlockAsm
-
-match_length_ok_encodeBetterBlockAsm:
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeBetterBlockAsm
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeBetterBlockAsm
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeBetterBlockAsm
- CMPL SI, $0x00010000
- JLT three_bytes_match_emit_encodeBetterBlockAsm
- CMPL SI, $0x01000000
- JLT four_bytes_match_emit_encodeBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_match_emit_encodeBetterBlockAsm
-
-four_bytes_match_emit_encodeBetterBlockAsm:
- MOVL SI, R11
- SHRL $0x10, R11
- MOVB $0xf8, (AX)
- MOVW SI, 1(AX)
- MOVB R11, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_match_emit_encodeBetterBlockAsm
-
-three_bytes_match_emit_encodeBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBetterBlockAsm
-
-two_bytes_match_emit_encodeBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeBetterBlockAsm
- JMP memmove_long_match_emit_encodeBetterBlockAsm
-
-one_byte_match_emit_encodeBetterBlockAsm:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBetterBlockAsm:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x04
- JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4
- CMPQ R9, $0x08
- JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4:
- MOVL (R10), R11
- MOVL R11, (AX)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
- MOVL (R10), R11
- MOVL -4(R10)(R9*1), R10
- MOVL R11, (AX)
- MOVL R10, -4(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBetterBlockAsm:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeBetterBlockAsm
-
-memmove_long_match_emit_encodeBetterBlockAsm:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeBetterBlockAsm:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
- CMPL R8, $0x00010000
- JL two_byte_offset_match_nolit_encodeBetterBlockAsm
-
-four_bytes_loop_back_match_nolit_encodeBetterBlockAsm:
- CMPL R12, $0x40
- JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm
- MOVB $0xff, (AX)
- MOVL R8, 1(AX)
- LEAL -64(R12), R12
- ADDQ $0x05, AX
- CMPL R12, $0x04
- JL four_bytes_remain_match_nolit_encodeBetterBlockAsm
-
- // emitRepeat
-emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
-
-cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
- CMPL R12, $0x00010100
- JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
- CMPL R12, $0x0100ffff
- JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
- LEAL -16842747(R12), R12
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
-
-repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
- LEAL -65536(R12), R12
- MOVL R12, R8
- MOVW $0x001d, (AX)
- MOVW R12, 2(AX)
- SARL $0x10, R8
- MOVB R8, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
- JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm
-
-four_bytes_remain_match_nolit_encodeBetterBlockAsm:
- TESTL R12, R12
- JZ match_nolit_emitcopy_end_encodeBetterBlockAsm
- MOVB $0x03, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVL R8, 1(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-two_byte_offset_match_nolit_encodeBetterBlockAsm:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
-
- // emitRepeat
-emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
- CMPL R12, $0x00010100
- JLT repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
- CMPL R12, $0x0100ffff
- JLT repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
- LEAL -16842747(R12), R12
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
-
-repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- LEAL -65536(R12), R12
- MOVL R12, R8
- MOVW $0x001d, (AX)
- MOVW R12, 2(AX)
- SARL $0x10, R8
- MOVB R8, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
- JMP two_byte_offset_match_nolit_encodeBetterBlockAsm
-
-two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeBetterBlockAsm
- CMPL R8, $0x00000800
- JGE emit_copy_three_match_nolit_encodeBetterBlockAsm
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-emit_copy_three_match_nolit_encodeBetterBlockAsm:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-match_is_repeat_encodeBetterBlockAsm:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_repeat_encodeBetterBlockAsm
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm
- CMPL SI, $0x00010000
- JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm
- CMPL SI, $0x01000000
- JLT four_bytes_match_emit_repeat_encodeBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
-
-four_bytes_match_emit_repeat_encodeBetterBlockAsm:
- MOVL SI, R11
- SHRL $0x10, R11
- MOVB $0xf8, (AX)
- MOVW SI, 1(AX)
- MOVB R11, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
-
-three_bytes_match_emit_repeat_encodeBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
-
-two_bytes_match_emit_repeat_encodeBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_repeat_encodeBetterBlockAsm
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm
-
-one_byte_match_emit_repeat_encodeBetterBlockAsm:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_repeat_encodeBetterBlockAsm:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x04
- JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4
- CMPQ R9, $0x08
- JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4:
- MOVL (R10), R11
- MOVL R11, (AX)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
- MOVL (R10), R11
- MOVL -4(R10)(R9*1), R10
- MOVL R11, (AX)
- MOVL R10, -4(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
-
-memmove_long_match_emit_repeat_encodeBetterBlockAsm:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitRepeat
-emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
-
-cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm
- CMPL R12, $0x00010100
- JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm
- CMPL R12, $0x0100ffff
- JLT repeat_five_match_nolit_repeat_encodeBetterBlockAsm
- LEAL -16842747(R12), R12
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
-
-repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
- LEAL -65536(R12), R12
- MOVL R12, R8
- MOVW $0x001d, (AX)
- MOVW R12, 2(AX)
- SARL $0x10, R8
- MOVB R8, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm
-
-repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
-
-match_nolit_emitcopy_end_encodeBetterBlockAsm:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBetterBlockAsm:
- MOVQ $0x00cf1bbcdcbfa563, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x08, R10
- IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x08, R10
- IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm
-
-emit_remainder_encodeBetterBlockAsm:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 5(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBetterBlockAsm:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBetterBlockAsm
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBetterBlockAsm
- CMPL DX, $0x00010000
- JLT three_bytes_emit_remainder_encodeBetterBlockAsm
- CMPL DX, $0x01000000
- JLT four_bytes_emit_remainder_encodeBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL DX, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm
-
-four_bytes_emit_remainder_encodeBetterBlockAsm:
- MOVL DX, BX
- SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm
-
-three_bytes_emit_remainder_encodeBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm
-
-two_bytes_emit_remainder_encodeBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBetterBlockAsm
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm
-
-one_byte_emit_remainder_encodeBetterBlockAsm:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBetterBlockAsm:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x04
- JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4
- CMPQ BX, $0x08
- JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4:
- MOVL (CX), SI
- MOVL SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm
-
-memmove_long_emit_remainder_encodeBetterBlockAsm:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBetterBlockAsm:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBetterBlockAsm4MB:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBetterBlockAsm4MB
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBetterBlockAsm4MB:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x07, SI
- CMPL SI, $0x63
- JLE check_maxskip_ok_encodeBetterBlockAsm4MB
- LEAL 100(CX), SI
- JMP check_maxskip_cont_encodeBetterBlockAsm4MB
-
-check_maxskip_ok_encodeBetterBlockAsm4MB:
- LEAL 1(CX)(SI*1), SI
-
-check_maxskip_cont_encodeBetterBlockAsm4MB:
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm4MB
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x00cf1bbcdcbfa563, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x08, R10
- IMULQ R9, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x32, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm4MB
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm4MB
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm4MB
-
-candidateS_match_encodeBetterBlockAsm4MB:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x08, R10
- IMULQ R9, R10
- SHRQ $0x30, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm4MB
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeBetterBlockAsm4MB:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBetterBlockAsm4MB
-
-match_extend_back_loop_encodeBetterBlockAsm4MB:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBetterBlockAsm4MB
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBetterBlockAsm4MB
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBetterBlockAsm4MB
- JMP match_extend_back_loop_encodeBetterBlockAsm4MB
-
-match_extend_back_end_encodeBetterBlockAsm4MB:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 4(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBetterBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBetterBlockAsm4MB:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
-
-matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeBetterBlockAsm4MB
-
-matchlen_loop_match_nolit_encodeBetterBlockAsm4MB:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB
- JZ match_nolit_end_encodeBetterBlockAsm4MB
-
-matchlen_match4_match_nolit_encodeBetterBlockAsm4MB:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeBetterBlockAsm4MB:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeBetterBlockAsm4MB:
- CMPL R8, $0x01
- JL match_nolit_end_encodeBetterBlockAsm4MB
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeBetterBlockAsm4MB
- LEAL 1(R12), R12
-
-match_nolit_end_encodeBetterBlockAsm4MB:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- CMPL 16(SP), R8
- JEQ match_is_repeat_encodeBetterBlockAsm4MB
- CMPL R12, $0x01
- JG match_length_ok_encodeBetterBlockAsm4MB
- CMPL R8, $0x0000ffff
- JLE match_length_ok_encodeBetterBlockAsm4MB
- MOVL 20(SP), CX
- INCL CX
- JMP search_loop_encodeBetterBlockAsm4MB
-
-match_length_ok_encodeBetterBlockAsm4MB:
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeBetterBlockAsm4MB
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeBetterBlockAsm4MB
- CMPL SI, $0x00010000
- JLT three_bytes_match_emit_encodeBetterBlockAsm4MB
- MOVL SI, R11
- SHRL $0x10, R11
- MOVB $0xf8, (AX)
- MOVW SI, 1(AX)
- MOVB R11, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
-
-three_bytes_match_emit_encodeBetterBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
-
-two_bytes_match_emit_encodeBetterBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeBetterBlockAsm4MB
- JMP memmove_long_match_emit_encodeBetterBlockAsm4MB
-
-one_byte_match_emit_encodeBetterBlockAsm4MB:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBetterBlockAsm4MB:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x04
- JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4
- CMPQ R9, $0x08
- JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4:
- MOVL (R10), R11
- MOVL R11, (AX)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7:
- MOVL (R10), R11
- MOVL -4(R10)(R9*1), R10
- MOVL R11, (AX)
- MOVL R10, -4(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBetterBlockAsm4MB:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB
-
-memmove_long_match_emit_encodeBetterBlockAsm4MB:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeBetterBlockAsm4MB:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
- CMPL R8, $0x00010000
- JL two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
-
-four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB:
- CMPL R12, $0x40
- JLE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
- MOVB $0xff, (AX)
- MOVL R8, 1(AX)
- LEAL -64(R12), R12
- ADDQ $0x05, AX
- CMPL R12, $0x04
- JL four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
-
- // emitRepeat
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
-
-cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy
- CMPL R12, $0x00010100
- JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy
- LEAL -65536(R12), R12
- MOVL R12, R8
- MOVW $0x001d, (AX)
- MOVW R12, 2(AX)
- SARL $0x10, R8
- MOVB R8, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
- JMP four_bytes_loop_back_match_nolit_encodeBetterBlockAsm4MB
-
-four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB:
- TESTL R12, R12
- JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
- MOVB $0x03, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVL R8, 1(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-two_byte_offset_match_nolit_encodeBetterBlockAsm4MB:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
- CMPL R12, $0x00010100
- JLT repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
- LEAL -65536(R12), R12
- MOVL R12, R8
- MOVW $0x001d, (AX)
- MOVW R12, 2(AX)
- SARL $0x10, R8
- MOVB R8, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
- JMP two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
-
-two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
- CMPL R8, $0x00000800
- JGE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-emit_copy_three_match_nolit_encodeBetterBlockAsm4MB:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-match_is_repeat_encodeBetterBlockAsm4MB:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_repeat_encodeBetterBlockAsm4MB
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
- CMPL SI, $0x00010000
- JLT three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
- MOVL SI, R11
- SHRL $0x10, R11
- MOVB $0xf8, (AX)
- MOVW SI, 1(AX)
- MOVB R11, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
-
-three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
-
-two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_repeat_encodeBetterBlockAsm4MB
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
-
-one_byte_match_emit_repeat_encodeBetterBlockAsm4MB:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_repeat_encodeBetterBlockAsm4MB:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x04
- JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4
- CMPQ R9, $0x08
- JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4:
- MOVL (R10), R11
- MOVL R11, (AX)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7:
- MOVL (R10), R11
- MOVL -4(R10)(R9*1), R10
- MOVL R11, (AX)
- MOVL R10, -4(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
-
-memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitRepeat
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
-
-cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB
- CMPL R12, $0x00010100
- JLT repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB
- LEAL -65536(R12), R12
- MOVL R12, R8
- MOVW $0x001d, (AX)
- MOVW R12, 2(AX)
- SARL $0x10, R8
- MOVB R8, 4(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB:
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-
-repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
-
-match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm4MB
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBetterBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBetterBlockAsm4MB:
- MOVQ $0x00cf1bbcdcbfa563, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x08, R10
- IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x08, R10
- IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm4MB
-
-emit_remainder_encodeBetterBlockAsm4MB:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 4(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBetterBlockAsm4MB
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBetterBlockAsm4MB:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBetterBlockAsm4MB
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBetterBlockAsm4MB
- CMPL DX, $0x00010000
- JLT three_bytes_emit_remainder_encodeBetterBlockAsm4MB
- MOVL DX, BX
- SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
-
-three_bytes_emit_remainder_encodeBetterBlockAsm4MB:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
-
-two_bytes_emit_remainder_encodeBetterBlockAsm4MB:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBetterBlockAsm4MB
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB
-
-one_byte_emit_remainder_encodeBetterBlockAsm4MB:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBetterBlockAsm4MB:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x04
- JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4
- CMPQ BX, $0x08
- JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4:
- MOVL (CX), SI
- MOVL SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
-
-memmove_long_emit_remainder_encodeBetterBlockAsm4MB:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeBetterBlockAsm12B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm12B(SB), $81944-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000280, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBetterBlockAsm12B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBetterBlockAsm12B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBetterBlockAsm12B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x06, SI
- LEAL 1(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm12B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x34, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 65560(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 65560(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm12B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm12B
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm12B
-
-candidateS_match_encodeBetterBlockAsm12B:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm12B
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeBetterBlockAsm12B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBetterBlockAsm12B
-
-match_extend_back_loop_encodeBetterBlockAsm12B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBetterBlockAsm12B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBetterBlockAsm12B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBetterBlockAsm12B
- JMP match_extend_back_loop_encodeBetterBlockAsm12B
-
-match_extend_back_end_encodeBetterBlockAsm12B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBetterBlockAsm12B:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeBetterBlockAsm12B
-
-matchlen_loopback_match_nolit_encodeBetterBlockAsm12B:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm12B
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeBetterBlockAsm12B
-
-matchlen_loop_match_nolit_encodeBetterBlockAsm12B:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B
- JZ match_nolit_end_encodeBetterBlockAsm12B
-
-matchlen_match4_match_nolit_encodeBetterBlockAsm12B:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeBetterBlockAsm12B
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeBetterBlockAsm12B:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeBetterBlockAsm12B
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeBetterBlockAsm12B:
- CMPL R8, $0x01
- JL match_nolit_end_encodeBetterBlockAsm12B
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeBetterBlockAsm12B
- LEAL 1(R12), R12
-
-match_nolit_end_encodeBetterBlockAsm12B:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- CMPL 16(SP), R8
- JEQ match_is_repeat_encodeBetterBlockAsm12B
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeBetterBlockAsm12B
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeBetterBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBetterBlockAsm12B
-
-two_bytes_match_emit_encodeBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeBetterBlockAsm12B
- JMP memmove_long_match_emit_encodeBetterBlockAsm12B
-
-one_byte_match_emit_encodeBetterBlockAsm12B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBetterBlockAsm12B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x04
- JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4
- CMPQ R9, $0x08
- JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4:
- MOVL (R10), R11
- MOVL R11, (AX)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7:
- MOVL (R10), R11
- MOVL -4(R10)(R9*1), R10
- MOVL R11, (AX)
- MOVL R10, -4(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBetterBlockAsm12B:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B
-
-memmove_long_match_emit_encodeBetterBlockAsm12B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeBetterBlockAsm12B:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeBetterBlockAsm12B:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
-
-repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
-
-repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
-
-repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
- JMP two_byte_offset_match_nolit_encodeBetterBlockAsm12B
-
-two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B
- CMPL R8, $0x00000800
- JGE emit_copy_three_match_nolit_encodeBetterBlockAsm12B
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
-
-emit_copy_three_match_nolit_encodeBetterBlockAsm12B:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
-
-match_is_repeat_encodeBetterBlockAsm12B:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_repeat_encodeBetterBlockAsm12B
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
-
-two_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_repeat_encodeBetterBlockAsm12B
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
-
-one_byte_match_emit_repeat_encodeBetterBlockAsm12B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_repeat_encodeBetterBlockAsm12B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x04
- JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4
- CMPQ R9, $0x08
- JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4:
- MOVL (R10), R11
- MOVL R11, (AX)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7:
- MOVL (R10), R11
- MOVL -4(R10)(R9*1), R10
- MOVL R11, (AX)
- MOVL R10, -4(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
-
-memmove_long_match_emit_repeat_encodeBetterBlockAsm12B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitRepeat
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
-
-cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
-
-repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
-
-repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B
-
-repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
-
-match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm12B
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBetterBlockAsm12B:
- MOVQ $0x0000cf1bbcdcbf9b, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x32, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x34, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x34, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 65560(SP)(R11*4)
- MOVL R15, 65560(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x32, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x34, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 65560(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm12B
-
-emit_remainder_encodeBetterBlockAsm12B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBetterBlockAsm12B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBetterBlockAsm12B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBetterBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B
-
-two_bytes_emit_remainder_encodeBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBetterBlockAsm12B
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B
-
-one_byte_emit_remainder_encodeBetterBlockAsm12B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBetterBlockAsm12B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x04
- JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4
- CMPQ BX, $0x08
- JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4:
- MOVL (CX), SI
- MOVL SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
-
-memmove_long_emit_remainder_encodeBetterBlockAsm12B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBetterBlockAsm12B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeBetterBlockAsm10B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm10B(SB), $20504-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x000000a0, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBetterBlockAsm10B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBetterBlockAsm10B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBetterBlockAsm10B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x05, SI
- LEAL 1(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm10B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x34, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x36, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 16408(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 16408(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm10B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm10B
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm10B
-
-candidateS_match_encodeBetterBlockAsm10B:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x34, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm10B
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeBetterBlockAsm10B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBetterBlockAsm10B
-
-match_extend_back_loop_encodeBetterBlockAsm10B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBetterBlockAsm10B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBetterBlockAsm10B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBetterBlockAsm10B
- JMP match_extend_back_loop_encodeBetterBlockAsm10B
-
-match_extend_back_end_encodeBetterBlockAsm10B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBetterBlockAsm10B:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeBetterBlockAsm10B
-
-matchlen_loopback_match_nolit_encodeBetterBlockAsm10B:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm10B
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeBetterBlockAsm10B
-
-matchlen_loop_match_nolit_encodeBetterBlockAsm10B:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B
- JZ match_nolit_end_encodeBetterBlockAsm10B
-
-matchlen_match4_match_nolit_encodeBetterBlockAsm10B:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeBetterBlockAsm10B
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeBetterBlockAsm10B:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeBetterBlockAsm10B
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeBetterBlockAsm10B:
- CMPL R8, $0x01
- JL match_nolit_end_encodeBetterBlockAsm10B
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeBetterBlockAsm10B
- LEAL 1(R12), R12
-
-match_nolit_end_encodeBetterBlockAsm10B:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- CMPL 16(SP), R8
- JEQ match_is_repeat_encodeBetterBlockAsm10B
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeBetterBlockAsm10B
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeBetterBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBetterBlockAsm10B
-
-two_bytes_match_emit_encodeBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeBetterBlockAsm10B
- JMP memmove_long_match_emit_encodeBetterBlockAsm10B
-
-one_byte_match_emit_encodeBetterBlockAsm10B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBetterBlockAsm10B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x04
- JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4
- CMPQ R9, $0x08
- JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4:
- MOVL (R10), R11
- MOVL R11, (AX)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7:
- MOVL (R10), R11
- MOVL -4(R10)(R9*1), R10
- MOVL R11, (AX)
- MOVL R10, -4(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBetterBlockAsm10B:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B
-
-memmove_long_match_emit_encodeBetterBlockAsm10B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeBetterBlockAsm10B:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeBetterBlockAsm10B:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
-
-repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
-
-repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
-
-repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
- JMP two_byte_offset_match_nolit_encodeBetterBlockAsm10B
-
-two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B
- CMPL R8, $0x00000800
- JGE emit_copy_three_match_nolit_encodeBetterBlockAsm10B
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
-
-emit_copy_three_match_nolit_encodeBetterBlockAsm10B:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
-
-match_is_repeat_encodeBetterBlockAsm10B:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_repeat_encodeBetterBlockAsm10B
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
-
-two_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_repeat_encodeBetterBlockAsm10B
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
-
-one_byte_match_emit_repeat_encodeBetterBlockAsm10B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_repeat_encodeBetterBlockAsm10B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x04
- JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4
- CMPQ R9, $0x08
- JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4:
- MOVL (R10), R11
- MOVL R11, (AX)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7:
- MOVL (R10), R11
- MOVL -4(R10)(R9*1), R10
- MOVL R11, (AX)
- MOVL R10, -4(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
-
-memmove_long_match_emit_repeat_encodeBetterBlockAsm10B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitRepeat
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
- CMPL R8, $0x00000800
- JLT repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
-
-cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
-
-repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
-
-repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B
-
-repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
-
-match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm10B
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBetterBlockAsm10B:
- MOVQ $0x0000cf1bbcdcbf9b, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x34, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x36, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x36, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 16408(SP)(R11*4)
- MOVL R15, 16408(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x34, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x36, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 16408(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm10B
-
-emit_remainder_encodeBetterBlockAsm10B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBetterBlockAsm10B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBetterBlockAsm10B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBetterBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B
-
-two_bytes_emit_remainder_encodeBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBetterBlockAsm10B
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B
-
-one_byte_emit_remainder_encodeBetterBlockAsm10B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBetterBlockAsm10B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x04
- JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4
- CMPQ BX, $0x08
- JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4:
- MOVL (CX), SI
- MOVL SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
-
-memmove_long_emit_remainder_encodeBetterBlockAsm10B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBetterBlockAsm10B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeBetterBlockAsm8B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm8B(SB), $5144-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000028, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeBetterBlockAsm8B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeBetterBlockAsm8B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -6(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeBetterBlockAsm8B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x04, SI
- LEAL 1(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm8B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x38, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 4120(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 4120(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm8B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm8B
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm8B
-
-candidateS_match_encodeBetterBlockAsm8B:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x36, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeBetterBlockAsm8B
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeBetterBlockAsm8B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeBetterBlockAsm8B
-
-match_extend_back_loop_encodeBetterBlockAsm8B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeBetterBlockAsm8B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeBetterBlockAsm8B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeBetterBlockAsm8B
- JMP match_extend_back_loop_encodeBetterBlockAsm8B
-
-match_extend_back_end_encodeBetterBlockAsm8B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeBetterBlockAsm8B:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeBetterBlockAsm8B
-
-matchlen_loopback_match_nolit_encodeBetterBlockAsm8B:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeBetterBlockAsm8B
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeBetterBlockAsm8B
-
-matchlen_loop_match_nolit_encodeBetterBlockAsm8B:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B
- JZ match_nolit_end_encodeBetterBlockAsm8B
-
-matchlen_match4_match_nolit_encodeBetterBlockAsm8B:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeBetterBlockAsm8B
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeBetterBlockAsm8B:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeBetterBlockAsm8B
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeBetterBlockAsm8B:
- CMPL R8, $0x01
- JL match_nolit_end_encodeBetterBlockAsm8B
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeBetterBlockAsm8B
- LEAL 1(R12), R12
-
-match_nolit_end_encodeBetterBlockAsm8B:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- CMPL 16(SP), R8
- JEQ match_is_repeat_encodeBetterBlockAsm8B
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeBetterBlockAsm8B
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeBetterBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeBetterBlockAsm8B
-
-two_bytes_match_emit_encodeBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeBetterBlockAsm8B
- JMP memmove_long_match_emit_encodeBetterBlockAsm8B
-
-one_byte_match_emit_encodeBetterBlockAsm8B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeBetterBlockAsm8B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x04
- JLE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4
- CMPQ R9, $0x08
- JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4:
- MOVL (R10), R11
- MOVL R11, (AX)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7:
- MOVL (R10), R11
- MOVL -4(R10)(R9*1), R10
- MOVL R11, (AX)
- MOVL R10, -4(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeBetterBlockAsm8B:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B
-
-memmove_long_match_emit_encodeBetterBlockAsm8B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeBetterBlockAsm8B:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeBetterBlockAsm8B:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
-
- // emitRepeat
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
-
-cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-
-repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-
-repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
- JMP two_byte_offset_match_nolit_encodeBetterBlockAsm8B
-
-two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeBetterBlockAsm8B
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-
-emit_copy_three_match_nolit_encodeBetterBlockAsm8B:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-
-match_is_repeat_encodeBetterBlockAsm8B:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
- MOVL DI, R8
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R9
- SUBL SI, R8
- LEAL -1(R8), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_repeat_encodeBetterBlockAsm8B
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_repeat_encodeBetterBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
-
-two_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_repeat_encodeBetterBlockAsm8B
- JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
-
-one_byte_match_emit_repeat_encodeBetterBlockAsm8B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_repeat_encodeBetterBlockAsm8B:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveShort
- CMPQ R8, $0x04
- JLE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4
- CMPQ R8, $0x08
- JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7
- CMPQ R8, $0x10
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16
- CMPQ R8, $0x20
- JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4:
- MOVL (R9), R10
- MOVL R10, (AX)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7:
- MOVL (R9), R10
- MOVL -4(R9)(R8*1), R9
- MOVL R10, (AX)
- MOVL R9, -4(AX)(R8*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
- JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
-
-memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
-
-memmove_long_match_emit_repeat_encodeBetterBlockAsm8B:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R13
- SUBQ R10, R13
- DECQ R11
- JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R13*1), R10
- LEAQ -32(AX)(R13*1), R14
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R14)
- MOVOA X5, 16(R14)
- ADDQ $0x20, R14
- ADDQ $0x20, R10
- ADDQ $0x20, R13
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R13*1), X4
- MOVOU -16(R9)(R13*1), X5
- MOVOA X4, -32(AX)(R13*1)
- MOVOA X5, -16(AX)(R13*1)
- ADDQ $0x20, R13
- CMPQ R8, R13
- JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitRepeat
- MOVL R12, SI
- LEAL -4(R12), R12
- CMPL SI, $0x08
- JLE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B
-
-cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B:
- CMPL R12, $0x00000104
- JLT repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B
- LEAL -256(R12), R12
- MOVW $0x0019, (AX)
- MOVW R12, 2(AX)
- ADDQ $0x04, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-
-repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B:
- LEAL -4(R12), R12
- MOVW $0x0015, (AX)
- MOVB R12, 2(AX)
- ADDQ $0x03, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-
-repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B:
- SHLL $0x02, R12
- ORL $0x01, R12
- MOVW R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B
- XORQ SI, SI
- LEAL 1(SI)(R12*4), R12
- MOVB R8, 1(AX)
- SARL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
-
-match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeBetterBlockAsm8B
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeBetterBlockAsm8B:
- MOVQ $0x0000cf1bbcdcbf9b, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x36, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x38, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x38, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 4120(SP)(R11*4)
- MOVL R15, 4120(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x38, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 4120(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm8B
-
-emit_remainder_encodeBetterBlockAsm8B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeBetterBlockAsm8B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeBetterBlockAsm8B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeBetterBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B
-
-two_bytes_emit_remainder_encodeBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeBetterBlockAsm8B
- JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B
-
-one_byte_emit_remainder_encodeBetterBlockAsm8B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeBetterBlockAsm8B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x04
- JLE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4
- CMPQ BX, $0x08
- JB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4:
- MOVL (CX), SI
- MOVL SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(BX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
-
-memmove_long_emit_remainder_encodeBetterBlockAsm8B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeBetterBlockAsm8B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBlockAsm(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm(SB), $65560-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000200, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBlockAsm:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBlockAsm
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBlockAsm:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x06, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- SHLQ $0x10, R11
- IMULQ R9, R11
- SHRQ $0x32, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeSnappyBlockAsm
- LEAL 1(CX), DI
- MOVL 12(SP), SI
- MOVL DI, R8
- SUBL 16(SP), R8
- JZ repeat_extend_back_end_encodeSnappyBlockAsm
-
-repeat_extend_back_loop_encodeSnappyBlockAsm:
- CMPL DI, SI
- JLE repeat_extend_back_end_encodeSnappyBlockAsm
- MOVB -1(DX)(R8*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeSnappyBlockAsm
- LEAL -1(DI), DI
- DECL R8
- JNZ repeat_extend_back_loop_encodeSnappyBlockAsm
-
-repeat_extend_back_end_encodeSnappyBlockAsm:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm
- MOVL DI, R8
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R9
- SUBL SI, R8
- LEAL -1(R8), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeSnappyBlockAsm
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeSnappyBlockAsm
- CMPL SI, $0x00010000
- JLT three_bytes_repeat_emit_encodeSnappyBlockAsm
- CMPL SI, $0x01000000
- JLT four_bytes_repeat_emit_encodeSnappyBlockAsm
- MOVB $0xfc, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
-
-four_bytes_repeat_emit_encodeSnappyBlockAsm:
- MOVL SI, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW SI, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
-
-three_bytes_repeat_emit_encodeSnappyBlockAsm:
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
-
-two_bytes_repeat_emit_encodeSnappyBlockAsm:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeSnappyBlockAsm
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm
-
-one_byte_repeat_emit_encodeSnappyBlockAsm:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeSnappyBlockAsm:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveShort
- CMPQ R8, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
- CMPQ R8, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
- CMPQ R8, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
-
-memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm
-
-memmove_long_repeat_emit_encodeSnappyBlockAsm:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
- JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R11, R11
- CMPL R8, $0x08
- JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm
-
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm:
- MOVQ (R9)(R11*1), R10
- XORQ (SI)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm
-
-#ifdef GOAMD64_v3
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R10, R10
-
-#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
- JMP repeat_extend_forward_end_encodeSnappyBlockAsm
-
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm
- JZ repeat_extend_forward_end_encodeSnappyBlockAsm
-
-matchlen_match4_repeat_extend_encodeSnappyBlockAsm:
- CMPL R8, $0x04
- JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm
- MOVL (R9)(R11*1), R10
- CMPL (SI)(R11*1), R10
- JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm
- SUBL $0x04, R8
- LEAL 4(R11), R11
-
-matchlen_match2_repeat_extend_encodeSnappyBlockAsm:
- CMPL R8, $0x02
- JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm
- MOVW (R9)(R11*1), R10
- CMPW (SI)(R11*1), R10
- JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm
- SUBL $0x02, R8
- LEAL 2(R11), R11
-
-matchlen_match1_repeat_extend_encodeSnappyBlockAsm:
- CMPL R8, $0x01
- JL repeat_extend_forward_end_encodeSnappyBlockAsm
- MOVB (R9)(R11*1), R10
- CMPB (SI)(R11*1), R10
- JNE repeat_extend_forward_end_encodeSnappyBlockAsm
- LEAL 1(R11), R11
-
-repeat_extend_forward_end_encodeSnappyBlockAsm:
- ADDL R11, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
-
- // emitCopy
- CMPL DI, $0x00010000
- JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
-
-four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
- CMPL SI, $0x40
- JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
- MOVB $0xff, (AX)
- MOVL DI, 1(AX)
- LEAL -64(SI), SI
- ADDQ $0x05, AX
- CMPL SI, $0x04
- JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
- JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
-
-four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
- TESTL SI, SI
- JZ repeat_end_emit_encodeSnappyBlockAsm
- MOVB $0x03, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVL DI, 1(AX)
- ADDQ $0x05, AX
- JMP repeat_end_emit_encodeSnappyBlockAsm
-
-two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
- JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
-
-two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
- CMPL DI, $0x00000800
- JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeSnappyBlockAsm
-
-emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeSnappyBlockAsm:
- MOVL CX, 12(SP)
- JMP search_loop_encodeSnappyBlockAsm
-
-no_repeat_found_encodeSnappyBlockAsm:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBlockAsm
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeSnappyBlockAsm
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeSnappyBlockAsm
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBlockAsm
-
-candidate3_match_encodeSnappyBlockAsm:
- ADDL $0x02, CX
- JMP candidate_match_encodeSnappyBlockAsm
-
-candidate2_match_encodeSnappyBlockAsm:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBlockAsm:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBlockAsm
-
-match_extend_back_loop_encodeSnappyBlockAsm:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBlockAsm
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBlockAsm
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBlockAsm
- JMP match_extend_back_loop_encodeSnappyBlockAsm
-
-match_extend_back_end_encodeSnappyBlockAsm:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 5(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBlockAsm:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeSnappyBlockAsm
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBlockAsm
- CMPL R8, $0x00010000
- JLT three_bytes_match_emit_encodeSnappyBlockAsm
- CMPL R8, $0x01000000
- JLT four_bytes_match_emit_encodeSnappyBlockAsm
- MOVB $0xfc, (AX)
- MOVL R8, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_match_emit_encodeSnappyBlockAsm
-
-four_bytes_match_emit_encodeSnappyBlockAsm:
- MOVL R8, R10
- SHRL $0x10, R10
- MOVB $0xf8, (AX)
- MOVW R8, 1(AX)
- MOVB R10, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_match_emit_encodeSnappyBlockAsm
-
-three_bytes_match_emit_encodeSnappyBlockAsm:
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBlockAsm
-
-two_bytes_match_emit_encodeSnappyBlockAsm:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeSnappyBlockAsm
- JMP memmove_long_match_emit_encodeSnappyBlockAsm
-
-one_byte_match_emit_encodeSnappyBlockAsm:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBlockAsm:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBlockAsm:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeSnappyBlockAsm
-
-memmove_long_match_emit_encodeSnappyBlockAsm:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeSnappyBlockAsm:
-match_nolit_loop_encodeSnappyBlockAsm:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBlockAsm
-
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeSnappyBlockAsm
-
-matchlen_loop_match_nolit_encodeSnappyBlockAsm:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm
- JZ match_nolit_end_encodeSnappyBlockAsm
-
-matchlen_match4_match_nolit_encodeSnappyBlockAsm:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBlockAsm
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeSnappyBlockAsm:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBlockAsm
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeSnappyBlockAsm:
- CMPL DI, $0x01
- JL match_nolit_end_encodeSnappyBlockAsm
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeSnappyBlockAsm
- LEAL 1(R10), R10
-
-match_nolit_end_encodeSnappyBlockAsm:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
- CMPL SI, $0x00010000
- JL two_byte_offset_match_nolit_encodeSnappyBlockAsm
-
-four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
- CMPL R10, $0x40
- JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm
- MOVB $0xff, (AX)
- MOVL SI, 1(AX)
- LEAL -64(R10), R10
- ADDQ $0x05, AX
- CMPL R10, $0x04
- JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm
- JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
-
-four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
- TESTL R10, R10
- JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm
- MOVB $0x03, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
-
-two_byte_offset_match_nolit_encodeSnappyBlockAsm:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm
-
-two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm
- CMPL SI, $0x00000800
- JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm
-
-emit_copy_three_match_nolit_encodeSnappyBlockAsm:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBlockAsm:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBlockAsm:
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x10, R8
- IMULQ R9, R8
- SHRQ $0x32, R8
- SHLQ $0x10, SI
- IMULQ R9, SI
- SHRQ $0x32, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeSnappyBlockAsm
- INCL CX
- JMP search_loop_encodeSnappyBlockAsm
-
-emit_remainder_encodeSnappyBlockAsm:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 5(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBlockAsm:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBlockAsm
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBlockAsm
- CMPL DX, $0x00010000
- JLT three_bytes_emit_remainder_encodeSnappyBlockAsm
- CMPL DX, $0x01000000
- JLT four_bytes_emit_remainder_encodeSnappyBlockAsm
- MOVB $0xfc, (AX)
- MOVL DX, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
-
-four_bytes_emit_remainder_encodeSnappyBlockAsm:
- MOVL DX, BX
- SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
-
-three_bytes_emit_remainder_encodeSnappyBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
-
-two_bytes_emit_remainder_encodeSnappyBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBlockAsm
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm
-
-one_byte_emit_remainder_encodeSnappyBlockAsm:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBlockAsm:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm
-
-memmove_long_emit_remainder_encodeSnappyBlockAsm:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000200, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBlockAsm64K:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBlockAsm64K
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBlockAsm64K:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x06, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm64K
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- SHLQ $0x10, R11
- IMULQ R9, R11
- SHRQ $0x32, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeSnappyBlockAsm64K
- LEAL 1(CX), DI
- MOVL 12(SP), SI
- MOVL DI, R8
- SUBL 16(SP), R8
- JZ repeat_extend_back_end_encodeSnappyBlockAsm64K
-
-repeat_extend_back_loop_encodeSnappyBlockAsm64K:
- CMPL DI, SI
- JLE repeat_extend_back_end_encodeSnappyBlockAsm64K
- MOVB -1(DX)(R8*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeSnappyBlockAsm64K
- LEAL -1(DI), DI
- DECL R8
- JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K
-
-repeat_extend_back_end_encodeSnappyBlockAsm64K:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
- MOVL DI, R8
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R9
- SUBL SI, R8
- LEAL -1(R8), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeSnappyBlockAsm64K
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeSnappyBlockAsm64K
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K
-
-two_bytes_repeat_emit_encodeSnappyBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeSnappyBlockAsm64K
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K
-
-one_byte_repeat_emit_encodeSnappyBlockAsm64K:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeSnappyBlockAsm64K:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveShort
- CMPQ R8, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8
- CMPQ R8, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
- CMPQ R8, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
-
-memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
-
-memmove_long_repeat_emit_encodeSnappyBlockAsm64K:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
- JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R11, R11
- CMPL R8, $0x08
- JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
-
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K:
- MOVQ (R9)(R11*1), R10
- XORQ (SI)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K
-
-#ifdef GOAMD64_v3
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R10, R10
-
-#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
- JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K
-
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K
- JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K
-
-matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K:
- CMPL R8, $0x04
- JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
- MOVL (R9)(R11*1), R10
- CMPL (SI)(R11*1), R10
- JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
- SUBL $0x04, R8
- LEAL 4(R11), R11
-
-matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K:
- CMPL R8, $0x02
- JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
- MOVW (R9)(R11*1), R10
- CMPW (SI)(R11*1), R10
- JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
- SUBL $0x02, R8
- LEAL 2(R11), R11
-
-matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K:
- CMPL R8, $0x01
- JL repeat_extend_forward_end_encodeSnappyBlockAsm64K
- MOVB (R9)(R11*1), R10
- CMPB (SI)(R11*1), R10
- JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K
- LEAL 1(R11), R11
-
-repeat_extend_forward_end_encodeSnappyBlockAsm64K:
- ADDL R11, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
-
- // emitCopy
-two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
- JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K
-
-two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
- CMPL DI, $0x00000800
- JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeSnappyBlockAsm64K
-
-emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeSnappyBlockAsm64K:
- MOVL CX, 12(SP)
- JMP search_loop_encodeSnappyBlockAsm64K
-
-no_repeat_found_encodeSnappyBlockAsm64K:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBlockAsm64K
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeSnappyBlockAsm64K
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeSnappyBlockAsm64K
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBlockAsm64K
-
-candidate3_match_encodeSnappyBlockAsm64K:
- ADDL $0x02, CX
- JMP candidate_match_encodeSnappyBlockAsm64K
-
-candidate2_match_encodeSnappyBlockAsm64K:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBlockAsm64K:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBlockAsm64K
-
-match_extend_back_loop_encodeSnappyBlockAsm64K:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBlockAsm64K
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBlockAsm64K
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBlockAsm64K
- JMP match_extend_back_loop_encodeSnappyBlockAsm64K
-
-match_extend_back_end_encodeSnappyBlockAsm64K:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBlockAsm64K:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeSnappyBlockAsm64K
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBlockAsm64K
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBlockAsm64K
-
-two_bytes_match_emit_encodeSnappyBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeSnappyBlockAsm64K
- JMP memmove_long_match_emit_encodeSnappyBlockAsm64K
-
-one_byte_match_emit_encodeSnappyBlockAsm64K:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBlockAsm64K:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBlockAsm64K:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K
-
-memmove_long_match_emit_encodeSnappyBlockAsm64K:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeSnappyBlockAsm64K:
-match_nolit_loop_encodeSnappyBlockAsm64K:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
-
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeSnappyBlockAsm64K
-
-matchlen_loop_match_nolit_encodeSnappyBlockAsm64K:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K
- JZ match_nolit_end_encodeSnappyBlockAsm64K
-
-matchlen_match4_match_nolit_encodeSnappyBlockAsm64K:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeSnappyBlockAsm64K:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeSnappyBlockAsm64K:
- CMPL DI, $0x01
- JL match_nolit_end_encodeSnappyBlockAsm64K
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeSnappyBlockAsm64K
- LEAL 1(R10), R10
-
-match_nolit_end_encodeSnappyBlockAsm64K:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeSnappyBlockAsm64K:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K
-
-two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
- CMPL SI, $0x00000800
- JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K
-
-emit_copy_three_match_nolit_encodeSnappyBlockAsm64K:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBlockAsm64K:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm64K
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBlockAsm64K:
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x10, R8
- IMULQ R9, R8
- SHRQ $0x32, R8
- SHLQ $0x10, SI
- IMULQ R9, SI
- SHRQ $0x32, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeSnappyBlockAsm64K
- INCL CX
- JMP search_loop_encodeSnappyBlockAsm64K
-
-emit_remainder_encodeSnappyBlockAsm64K:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBlockAsm64K:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBlockAsm64K
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBlockAsm64K
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K
-
-two_bytes_emit_remainder_encodeSnappyBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBlockAsm64K
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K
-
-one_byte_emit_remainder_encodeSnappyBlockAsm64K:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBlockAsm64K:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
-
-memmove_long_emit_remainder_encodeSnappyBlockAsm64K:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000080, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBlockAsm12B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBlockAsm12B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBlockAsm12B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x05, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm12B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x000000cf1bbcdcbb, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x18, R10
- IMULQ R9, R10
- SHRQ $0x34, R10
- SHLQ $0x18, R11
- IMULQ R9, R11
- SHRQ $0x34, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x18, R10
- IMULQ R9, R10
- SHRQ $0x34, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeSnappyBlockAsm12B
- LEAL 1(CX), DI
- MOVL 12(SP), SI
- MOVL DI, R8
- SUBL 16(SP), R8
- JZ repeat_extend_back_end_encodeSnappyBlockAsm12B
-
-repeat_extend_back_loop_encodeSnappyBlockAsm12B:
- CMPL DI, SI
- JLE repeat_extend_back_end_encodeSnappyBlockAsm12B
- MOVB -1(DX)(R8*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeSnappyBlockAsm12B
- LEAL -1(DI), DI
- DECL R8
- JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B
-
-repeat_extend_back_end_encodeSnappyBlockAsm12B:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
- MOVL DI, R8
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R9
- SUBL SI, R8
- LEAL -1(R8), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B
-
-two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeSnappyBlockAsm12B
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B
-
-one_byte_repeat_emit_encodeSnappyBlockAsm12B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeSnappyBlockAsm12B:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveShort
- CMPQ R8, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
- CMPQ R8, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
- CMPQ R8, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
-
-memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
-
-memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
- JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R11, R11
- CMPL R8, $0x08
- JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
-
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B:
- MOVQ (R9)(R11*1), R10
- XORQ (SI)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B
-
-#ifdef GOAMD64_v3
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R10, R10
-
-#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
- JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B
-
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B
- JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B
-
-matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B:
- CMPL R8, $0x04
- JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
- MOVL (R9)(R11*1), R10
- CMPL (SI)(R11*1), R10
- JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
- SUBL $0x04, R8
- LEAL 4(R11), R11
-
-matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B:
- CMPL R8, $0x02
- JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
- MOVW (R9)(R11*1), R10
- CMPW (SI)(R11*1), R10
- JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
- SUBL $0x02, R8
- LEAL 2(R11), R11
-
-matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B:
- CMPL R8, $0x01
- JL repeat_extend_forward_end_encodeSnappyBlockAsm12B
- MOVB (R9)(R11*1), R10
- CMPB (SI)(R11*1), R10
- JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B
- LEAL 1(R11), R11
-
-repeat_extend_forward_end_encodeSnappyBlockAsm12B:
- ADDL R11, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
-
- // emitCopy
-two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
- JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
-
-two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
- CMPL DI, $0x00000800
- JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeSnappyBlockAsm12B
-
-emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeSnappyBlockAsm12B:
- MOVL CX, 12(SP)
- JMP search_loop_encodeSnappyBlockAsm12B
-
-no_repeat_found_encodeSnappyBlockAsm12B:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBlockAsm12B
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeSnappyBlockAsm12B
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeSnappyBlockAsm12B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBlockAsm12B
-
-candidate3_match_encodeSnappyBlockAsm12B:
- ADDL $0x02, CX
- JMP candidate_match_encodeSnappyBlockAsm12B
-
-candidate2_match_encodeSnappyBlockAsm12B:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBlockAsm12B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBlockAsm12B
-
-match_extend_back_loop_encodeSnappyBlockAsm12B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBlockAsm12B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBlockAsm12B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBlockAsm12B
- JMP match_extend_back_loop_encodeSnappyBlockAsm12B
-
-match_extend_back_end_encodeSnappyBlockAsm12B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBlockAsm12B:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeSnappyBlockAsm12B
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBlockAsm12B
-
-two_bytes_match_emit_encodeSnappyBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeSnappyBlockAsm12B
- JMP memmove_long_match_emit_encodeSnappyBlockAsm12B
-
-one_byte_match_emit_encodeSnappyBlockAsm12B:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBlockAsm12B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B
-
-memmove_long_match_emit_encodeSnappyBlockAsm12B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
-match_nolit_loop_encodeSnappyBlockAsm12B:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
-
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeSnappyBlockAsm12B
-
-matchlen_loop_match_nolit_encodeSnappyBlockAsm12B:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B
- JZ match_nolit_end_encodeSnappyBlockAsm12B
-
-matchlen_match4_match_nolit_encodeSnappyBlockAsm12B:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeSnappyBlockAsm12B:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeSnappyBlockAsm12B:
- CMPL DI, $0x01
- JL match_nolit_end_encodeSnappyBlockAsm12B
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeSnappyBlockAsm12B
- LEAL 1(R10), R10
-
-match_nolit_end_encodeSnappyBlockAsm12B:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
-
-two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
- CMPL SI, $0x00000800
- JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
-
-emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm12B
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBlockAsm12B:
- MOVQ $0x000000cf1bbcdcbb, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x18, R8
- IMULQ R9, R8
- SHRQ $0x34, R8
- SHLQ $0x18, SI
- IMULQ R9, SI
- SHRQ $0x34, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeSnappyBlockAsm12B
- INCL CX
- JMP search_loop_encodeSnappyBlockAsm12B
-
-emit_remainder_encodeSnappyBlockAsm12B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBlockAsm12B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B
-
-two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBlockAsm12B
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B
-
-one_byte_emit_remainder_encodeSnappyBlockAsm12B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBlockAsm12B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
-
-memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000020, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBlockAsm10B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBlockAsm10B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBlockAsm10B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x05, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm10B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x9e3779b1, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x20, R10
- IMULQ R9, R10
- SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ R9, R11
- SHRQ $0x36, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x20, R10
- IMULQ R9, R10
- SHRQ $0x36, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeSnappyBlockAsm10B
- LEAL 1(CX), DI
- MOVL 12(SP), SI
- MOVL DI, R8
- SUBL 16(SP), R8
- JZ repeat_extend_back_end_encodeSnappyBlockAsm10B
-
-repeat_extend_back_loop_encodeSnappyBlockAsm10B:
- CMPL DI, SI
- JLE repeat_extend_back_end_encodeSnappyBlockAsm10B
- MOVB -1(DX)(R8*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeSnappyBlockAsm10B
- LEAL -1(DI), DI
- DECL R8
- JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B
-
-repeat_extend_back_end_encodeSnappyBlockAsm10B:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
- MOVL DI, R8
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R9
- SUBL SI, R8
- LEAL -1(R8), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B
-
-two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeSnappyBlockAsm10B
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B
-
-one_byte_repeat_emit_encodeSnappyBlockAsm10B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeSnappyBlockAsm10B:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveShort
- CMPQ R8, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
- CMPQ R8, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
- CMPQ R8, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
-
-memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
-
-memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
- JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R11, R11
- CMPL R8, $0x08
- JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
-
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B:
- MOVQ (R9)(R11*1), R10
- XORQ (SI)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B
-
-#ifdef GOAMD64_v3
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R10, R10
-
-#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
- JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B
-
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B
- JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B
-
-matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B:
- CMPL R8, $0x04
- JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
- MOVL (R9)(R11*1), R10
- CMPL (SI)(R11*1), R10
- JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
- SUBL $0x04, R8
- LEAL 4(R11), R11
-
-matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B:
- CMPL R8, $0x02
- JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
- MOVW (R9)(R11*1), R10
- CMPW (SI)(R11*1), R10
- JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
- SUBL $0x02, R8
- LEAL 2(R11), R11
-
-matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B:
- CMPL R8, $0x01
- JL repeat_extend_forward_end_encodeSnappyBlockAsm10B
- MOVB (R9)(R11*1), R10
- CMPB (SI)(R11*1), R10
- JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B
- LEAL 1(R11), R11
-
-repeat_extend_forward_end_encodeSnappyBlockAsm10B:
- ADDL R11, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
-
- // emitCopy
-two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
- JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
-
-two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
- CMPL DI, $0x00000800
- JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeSnappyBlockAsm10B
-
-emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeSnappyBlockAsm10B:
- MOVL CX, 12(SP)
- JMP search_loop_encodeSnappyBlockAsm10B
-
-no_repeat_found_encodeSnappyBlockAsm10B:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBlockAsm10B
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeSnappyBlockAsm10B
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeSnappyBlockAsm10B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBlockAsm10B
-
-candidate3_match_encodeSnappyBlockAsm10B:
- ADDL $0x02, CX
- JMP candidate_match_encodeSnappyBlockAsm10B
-
-candidate2_match_encodeSnappyBlockAsm10B:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBlockAsm10B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBlockAsm10B
-
-match_extend_back_loop_encodeSnappyBlockAsm10B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBlockAsm10B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBlockAsm10B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBlockAsm10B
- JMP match_extend_back_loop_encodeSnappyBlockAsm10B
-
-match_extend_back_end_encodeSnappyBlockAsm10B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBlockAsm10B:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeSnappyBlockAsm10B
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBlockAsm10B
-
-two_bytes_match_emit_encodeSnappyBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeSnappyBlockAsm10B
- JMP memmove_long_match_emit_encodeSnappyBlockAsm10B
-
-one_byte_match_emit_encodeSnappyBlockAsm10B:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBlockAsm10B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B
-
-memmove_long_match_emit_encodeSnappyBlockAsm10B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
-match_nolit_loop_encodeSnappyBlockAsm10B:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
-
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeSnappyBlockAsm10B
-
-matchlen_loop_match_nolit_encodeSnappyBlockAsm10B:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B
- JZ match_nolit_end_encodeSnappyBlockAsm10B
-
-matchlen_match4_match_nolit_encodeSnappyBlockAsm10B:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeSnappyBlockAsm10B:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeSnappyBlockAsm10B:
- CMPL DI, $0x01
- JL match_nolit_end_encodeSnappyBlockAsm10B
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeSnappyBlockAsm10B
- LEAL 1(R10), R10
-
-match_nolit_end_encodeSnappyBlockAsm10B:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
-
-two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
- CMPL SI, $0x00000800
- JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
-
-emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm10B
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBlockAsm10B:
- MOVQ $0x9e3779b1, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x20, R8
- IMULQ R9, R8
- SHRQ $0x36, R8
- SHLQ $0x20, SI
- IMULQ R9, SI
- SHRQ $0x36, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeSnappyBlockAsm10B
- INCL CX
- JMP search_loop_encodeSnappyBlockAsm10B
-
-emit_remainder_encodeSnappyBlockAsm10B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBlockAsm10B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B
-
-two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBlockAsm10B
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B
-
-one_byte_emit_remainder_encodeSnappyBlockAsm10B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBlockAsm10B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
-
-memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000008, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBlockAsm8B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBlockAsm8B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL CX, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBlockAsm8B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x04, SI
- LEAL 4(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm8B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x9e3779b1, R9
- MOVQ DI, R10
- MOVQ DI, R11
- SHRQ $0x08, R11
- SHLQ $0x20, R10
- IMULQ R9, R10
- SHRQ $0x38, R10
- SHLQ $0x20, R11
- IMULQ R9, R11
- SHRQ $0x38, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 24(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- LEAL 1(CX), R10
- MOVL R10, 24(SP)(R11*4)
- MOVQ DI, R10
- SHRQ $0x10, R10
- SHLQ $0x20, R10
- IMULQ R9, R10
- SHRQ $0x38, R10
- MOVL CX, R9
- SUBL 16(SP), R9
- MOVL 1(DX)(R9*1), R11
- MOVQ DI, R9
- SHRQ $0x08, R9
- CMPL R9, R11
- JNE no_repeat_found_encodeSnappyBlockAsm8B
- LEAL 1(CX), DI
- MOVL 12(SP), SI
- MOVL DI, R8
- SUBL 16(SP), R8
- JZ repeat_extend_back_end_encodeSnappyBlockAsm8B
-
-repeat_extend_back_loop_encodeSnappyBlockAsm8B:
- CMPL DI, SI
- JLE repeat_extend_back_end_encodeSnappyBlockAsm8B
- MOVB -1(DX)(R8*1), BL
- MOVB -1(DX)(DI*1), R9
- CMPB BL, R9
- JNE repeat_extend_back_end_encodeSnappyBlockAsm8B
- LEAL -1(DI), DI
- DECL R8
- JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B
-
-repeat_extend_back_end_encodeSnappyBlockAsm8B:
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
- MOVL DI, R8
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R9
- SUBL SI, R8
- LEAL -1(R8), SI
- CMPL SI, $0x3c
- JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B
- CMPL SI, $0x00000100
- JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B
-
-two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_repeat_emit_encodeSnappyBlockAsm8B
- JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B
-
-one_byte_repeat_emit_encodeSnappyBlockAsm8B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_repeat_emit_encodeSnappyBlockAsm8B:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveShort
- CMPQ R8, $0x08
- JLE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
- CMPQ R8, $0x10
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
- CMPQ R8, $0x20
- JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
- MOVQ (R9), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
- MOVQ (R9), R10
- MOVQ -8(R9)(R8*1), R9
- MOVQ R10, (AX)
- MOVQ R9, -8(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
- MOVOU (R9), X0
- MOVOU -16(R9)(R8*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R8*1)
- JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
-
-emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
-
-memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
- MOVQ SI, AX
- JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
-
-memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
- LEAQ (AX)(R8*1), SI
-
- // genMemMoveLong
- MOVOU (R9), X0
- MOVOU 16(R9), X1
- MOVOU -32(R9)(R8*1), X2
- MOVOU -16(R9)(R8*1), X3
- MOVQ R8, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R9)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R9)(R12*1), X4
- MOVOU -16(R9)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R8, R12
- JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R8*1)
- MOVOU X3, -16(AX)(R8*1)
- MOVQ SI, AX
-
-emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
- ADDL $0x05, CX
- MOVL CX, SI
- SUBL 16(SP), SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R11, R11
- CMPL R8, $0x08
- JL matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
-
-matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B:
- MOVQ (R9)(R11*1), R10
- XORQ (SI)(R11*1), R10
- TESTQ R10, R10
- JZ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B
-
-#ifdef GOAMD64_v3
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R10, R10
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R10, R10
-
-#endif
- SARQ $0x03, R10
- LEAL (R11)(R10*1), R11
- JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B
-
-matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B:
- LEAL -8(R8), R8
- LEAL 8(R11), R11
- CMPL R8, $0x08
- JGE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B
- JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B
-
-matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B:
- CMPL R8, $0x04
- JL matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
- MOVL (R9)(R11*1), R10
- CMPL (SI)(R11*1), R10
- JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
- SUBL $0x04, R8
- LEAL 4(R11), R11
-
-matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B:
- CMPL R8, $0x02
- JL matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
- MOVW (R9)(R11*1), R10
- CMPW (SI)(R11*1), R10
- JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
- SUBL $0x02, R8
- LEAL 2(R11), R11
-
-matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B:
- CMPL R8, $0x01
- JL repeat_extend_forward_end_encodeSnappyBlockAsm8B
- MOVB (R9)(R11*1), R10
- CMPB (SI)(R11*1), R10
- JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B
- LEAL 1(R11), R11
-
-repeat_extend_forward_end_encodeSnappyBlockAsm8B:
- ADDL R11, CX
- MOVL CX, SI
- SUBL DI, SI
- MOVL 16(SP), DI
-
- // emitCopy
-two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
- CMPL SI, $0x40
- JLE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
- MOVB $0xee, (AX)
- MOVW DI, 1(AX)
- LEAL -60(SI), SI
- ADDQ $0x03, AX
- JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
-
-two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
- CMPL SI, $0x0c
- JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
- MOVB $0x01, BL
- LEAL -16(BX)(SI*4), SI
- MOVB DI, 1(AX)
- SHRL $0x08, DI
- SHLL $0x05, DI
- ORL DI, SI
- MOVB SI, (AX)
- ADDQ $0x02, AX
- JMP repeat_end_emit_encodeSnappyBlockAsm8B
-
-emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
- MOVB $0x02, BL
- LEAL -4(BX)(SI*4), SI
- MOVB SI, (AX)
- MOVW DI, 1(AX)
- ADDQ $0x03, AX
-
-repeat_end_emit_encodeSnappyBlockAsm8B:
- MOVL CX, 12(SP)
- JMP search_loop_encodeSnappyBlockAsm8B
-
-no_repeat_found_encodeSnappyBlockAsm8B:
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBlockAsm8B
- SHRQ $0x08, DI
- MOVL 24(SP)(R10*4), SI
- LEAL 2(CX), R9
- CMPL (DX)(R8*1), DI
- JEQ candidate2_match_encodeSnappyBlockAsm8B
- MOVL R9, 24(SP)(R10*4)
- SHRQ $0x08, DI
- CMPL (DX)(SI*1), DI
- JEQ candidate3_match_encodeSnappyBlockAsm8B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBlockAsm8B
-
-candidate3_match_encodeSnappyBlockAsm8B:
- ADDL $0x02, CX
- JMP candidate_match_encodeSnappyBlockAsm8B
-
-candidate2_match_encodeSnappyBlockAsm8B:
- MOVL R9, 24(SP)(R10*4)
- INCL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBlockAsm8B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBlockAsm8B
-
-match_extend_back_loop_encodeSnappyBlockAsm8B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBlockAsm8B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBlockAsm8B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBlockAsm8B
- JMP match_extend_back_loop_encodeSnappyBlockAsm8B
-
-match_extend_back_end_encodeSnappyBlockAsm8B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBlockAsm8B:
- MOVL CX, DI
- MOVL 12(SP), R8
- CMPL R8, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(R8*1), DI
- SUBL R8, R9
- LEAL -1(R9), R8
- CMPL R8, $0x3c
- JLT one_byte_match_emit_encodeSnappyBlockAsm8B
- CMPL R8, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBlockAsm8B
-
-two_bytes_match_emit_encodeSnappyBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB R8, 1(AX)
- ADDQ $0x02, AX
- CMPL R8, $0x40
- JL memmove_match_emit_encodeSnappyBlockAsm8B
- JMP memmove_long_match_emit_encodeSnappyBlockAsm8B
-
-one_byte_match_emit_encodeSnappyBlockAsm8B:
- SHLB $0x02, R8
- MOVB R8, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBlockAsm8B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
- MOVQ (DI), R10
- MOVQ R10, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
- MOVQ (DI), R10
- MOVQ -8(DI)(R9*1), DI
- MOVQ R10, (AX)
- MOVQ DI, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
- MOVOU (DI), X0
- MOVOU -16(DI)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
- MOVQ R8, AX
- JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B
-
-memmove_long_match_emit_encodeSnappyBlockAsm8B:
- LEAQ (AX)(R9*1), R8
-
- // genMemMoveLong
- MOVOU (DI), X0
- MOVOU 16(DI), X1
- MOVOU -32(DI)(R9*1), X2
- MOVOU -16(DI)(R9*1), X3
- MOVQ R9, R11
- SHRQ $0x05, R11
- MOVQ AX, R10
- ANDL $0x0000001f, R10
- MOVQ $0x00000040, R12
- SUBQ R10, R12
- DECQ R11
- JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(DI)(R12*1), R10
- LEAQ -32(AX)(R12*1), R13
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
- MOVOU (R10), X4
- MOVOU 16(R10), X5
- MOVOA X4, (R13)
- MOVOA X5, 16(R13)
- ADDQ $0x20, R13
- ADDQ $0x20, R10
- ADDQ $0x20, R12
- DECQ R11
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(DI)(R12*1), X4
- MOVOU -16(DI)(R12*1), X5
- MOVOA X4, -32(AX)(R12*1)
- MOVOA X5, -16(AX)(R12*1)
- ADDQ $0x20, R12
- CMPQ R9, R12
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ R8, AX
-
-emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
-match_nolit_loop_encodeSnappyBlockAsm8B:
- MOVL CX, DI
- SUBL SI, DI
- MOVL DI, 16(SP)
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), DI
- SUBL CX, DI
- LEAQ (DX)(CX*1), R8
- LEAQ (DX)(SI*1), SI
-
- // matchLen
- XORL R10, R10
- CMPL DI, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
-
-matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B:
- MOVQ (R8)(R10*1), R9
- XORQ (SI)(R10*1), R9
- TESTQ R9, R9
- JZ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B
-
-#ifdef GOAMD64_v3
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R9, R9
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R9, R9
-
-#endif
- SARQ $0x03, R9
- LEAL (R10)(R9*1), R10
- JMP match_nolit_end_encodeSnappyBlockAsm8B
-
-matchlen_loop_match_nolit_encodeSnappyBlockAsm8B:
- LEAL -8(DI), DI
- LEAL 8(R10), R10
- CMPL DI, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B
- JZ match_nolit_end_encodeSnappyBlockAsm8B
-
-matchlen_match4_match_nolit_encodeSnappyBlockAsm8B:
- CMPL DI, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
- MOVL (R8)(R10*1), R9
- CMPL (SI)(R10*1), R9
- JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
- SUBL $0x04, DI
- LEAL 4(R10), R10
-
-matchlen_match2_match_nolit_encodeSnappyBlockAsm8B:
- CMPL DI, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
- MOVW (R8)(R10*1), R9
- CMPW (SI)(R10*1), R9
- JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
- SUBL $0x02, DI
- LEAL 2(R10), R10
-
-matchlen_match1_match_nolit_encodeSnappyBlockAsm8B:
- CMPL DI, $0x01
- JL match_nolit_end_encodeSnappyBlockAsm8B
- MOVB (R8)(R10*1), R9
- CMPB (SI)(R10*1), R9
- JNE match_nolit_end_encodeSnappyBlockAsm8B
- LEAL 1(R10), R10
-
-match_nolit_end_encodeSnappyBlockAsm8B:
- ADDL R10, CX
- MOVL 16(SP), SI
- ADDL $0x04, R10
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
- CMPL R10, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
- MOVB $0xee, (AX)
- MOVW SI, 1(AX)
- LEAL -60(R10), R10
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
-
-two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
- CMPL R10, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
- MOVB $0x01, BL
- LEAL -16(BX)(R10*4), R10
- MOVB SI, 1(AX)
- SHRL $0x08, SI
- SHLL $0x05, SI
- ORL SI, R10
- MOVB R10, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
-
-emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
- MOVB $0x02, BL
- LEAL -4(BX)(R10*4), R10
- MOVB R10, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBlockAsm8B
- MOVQ -2(DX)(CX*1), DI
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBlockAsm8B:
- MOVQ $0x9e3779b1, R9
- MOVQ DI, R8
- SHRQ $0x10, DI
- MOVQ DI, SI
- SHLQ $0x20, R8
- IMULQ R9, R8
- SHRQ $0x38, R8
- SHLQ $0x20, SI
- IMULQ R9, SI
- SHRQ $0x38, SI
- LEAL -2(CX), R9
- LEAQ 24(SP)(SI*4), R10
- MOVL (R10), SI
- MOVL R9, 24(SP)(R8*4)
- MOVL CX, (R10)
- CMPL (DX)(SI*1), DI
- JEQ match_nolit_loop_encodeSnappyBlockAsm8B
- INCL CX
- JMP search_loop_encodeSnappyBlockAsm8B
-
-emit_remainder_encodeSnappyBlockAsm8B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBlockAsm8B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B
-
-two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBlockAsm8B
- JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B
-
-one_byte_emit_remainder_encodeSnappyBlockAsm8B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBlockAsm8B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
-
-memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm(SB), $327704-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBetterBlockAsm:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBetterBlockAsm
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBetterBlockAsm:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x07, SI
- CMPL SI, $0x63
- JLE check_maxskip_ok_encodeSnappyBetterBlockAsm
- LEAL 100(CX), SI
- JMP check_maxskip_cont_encodeSnappyBetterBlockAsm
-
-check_maxskip_ok_encodeSnappyBetterBlockAsm:
- LEAL 1(CX)(SI*1), SI
-
-check_maxskip_cont_encodeSnappyBetterBlockAsm:
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x00cf1bbcdcbfa563, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x08, R10
- IMULQ R9, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x32, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm
-
-candidateS_match_encodeSnappyBetterBlockAsm:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x08, R10
- IMULQ R9, R10
- SHRQ $0x30, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBetterBlockAsm:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm
-
-match_extend_back_loop_encodeSnappyBetterBlockAsm:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBetterBlockAsm
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBetterBlockAsm
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm
- JMP match_extend_back_loop_encodeSnappyBetterBlockAsm
-
-match_extend_back_end_encodeSnappyBetterBlockAsm:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 5(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBetterBlockAsm:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
-
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeSnappyBetterBlockAsm
-
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm
- JZ match_nolit_end_encodeSnappyBetterBlockAsm
-
-matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL R8, $0x01
- JL match_nolit_end_encodeSnappyBetterBlockAsm
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeSnappyBetterBlockAsm
- LEAL 1(R12), R12
-
-match_nolit_end_encodeSnappyBetterBlockAsm:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- CMPL R12, $0x01
- JG match_length_ok_encodeSnappyBetterBlockAsm
- CMPL R8, $0x0000ffff
- JLE match_length_ok_encodeSnappyBetterBlockAsm
- MOVL 20(SP), CX
- INCL CX
- JMP search_loop_encodeSnappyBetterBlockAsm
-
-match_length_ok_encodeSnappyBetterBlockAsm:
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeSnappyBetterBlockAsm
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm
- CMPL SI, $0x00010000
- JLT three_bytes_match_emit_encodeSnappyBetterBlockAsm
- CMPL SI, $0x01000000
- JLT four_bytes_match_emit_encodeSnappyBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
-
-four_bytes_match_emit_encodeSnappyBetterBlockAsm:
- MOVL SI, R11
- SHRL $0x10, R11
- MOVB $0xf8, (AX)
- MOVW SI, 1(AX)
- MOVB R11, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
-
-three_bytes_match_emit_encodeSnappyBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
-
-two_bytes_match_emit_encodeSnappyBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeSnappyBetterBlockAsm
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm
-
-one_byte_match_emit_encodeSnappyBetterBlockAsm:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBetterBlockAsm:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
-
-memmove_long_match_emit_encodeSnappyBetterBlockAsm:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeSnappyBetterBlockAsm:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
- CMPL R8, $0x00010000
- JL two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
-
-four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL R12, $0x40
- JLE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
- MOVB $0xff, (AX)
- MOVL R8, 1(AX)
- LEAL -64(R12), R12
- ADDQ $0x05, AX
- CMPL R12, $0x04
- JL four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
- JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm
-
-four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm:
- TESTL R12, R12
- JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
- MOVB $0x03, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVL R8, 1(AX)
- ADDQ $0x05, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
-
-two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
-
-two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
- CMPL R8, $0x00000800
- JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
-
-emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
- MOVQ $0x00cf1bbcdcbfa563, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x08, R10
- IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x08, R10
- IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm
-
-emit_remainder_encodeSnappyBetterBlockAsm:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 5(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBetterBlockAsm
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBetterBlockAsm:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm
- CMPL DX, $0x00010000
- JLT three_bytes_emit_remainder_encodeSnappyBetterBlockAsm
- CMPL DX, $0x01000000
- JLT four_bytes_emit_remainder_encodeSnappyBetterBlockAsm
- MOVB $0xfc, (AX)
- MOVL DX, 1(AX)
- ADDQ $0x05, AX
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
-
-four_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
- MOVL DX, BX
- SHRL $0x10, BX
- MOVB $0xf8, (AX)
- MOVW DX, 1(AX)
- MOVB BL, 3(AX)
- ADDQ $0x04, AX
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
-
-three_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
-
-two_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBetterBlockAsm
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
-
-one_byte_emit_remainder_encodeSnappyBetterBlockAsm:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBetterBlockAsm:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
-
-memmove_long_emit_remainder_encodeSnappyBetterBlockAsm:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBetterBlockAsm64K:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBetterBlockAsm64K
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBetterBlockAsm64K:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x07, SI
- LEAL 1(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm64K
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x00cf1bbcdcbfa563, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x08, R10
- IMULQ R9, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x32, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm64K
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm64K
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm64K
-
-candidateS_match_encodeSnappyBetterBlockAsm64K:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x08, R10
- IMULQ R9, R10
- SHRQ $0x30, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm64K
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBetterBlockAsm64K:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K
-
-match_extend_back_loop_encodeSnappyBetterBlockAsm64K:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBetterBlockAsm64K
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K
- JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K
-
-match_extend_back_end_encodeSnappyBetterBlockAsm64K:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBetterBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBetterBlockAsm64K:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
-
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeSnappyBetterBlockAsm64K
-
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K
- JZ match_nolit_end_encodeSnappyBetterBlockAsm64K
-
-matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL R8, $0x01
- JL match_nolit_end_encodeSnappyBetterBlockAsm64K
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeSnappyBetterBlockAsm64K
- LEAL 1(R12), R12
-
-match_nolit_end_encodeSnappyBetterBlockAsm64K:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeSnappyBetterBlockAsm64K
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm64K
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
-
-two_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeSnappyBetterBlockAsm64K
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
-
-one_byte_match_emit_encodeSnappyBetterBlockAsm64K:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBetterBlockAsm64K:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
-
-memmove_long_match_emit_encodeSnappyBetterBlockAsm64K:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K
-
-two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
- CMPL R8, $0x00000800
- JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K
-
-emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm64K
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
- MOVQ $0x00cf1bbcdcbfa563, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x08, R10
- IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x08, R10
- IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm64K
-
-emit_remainder_encodeSnappyBetterBlockAsm64K:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBetterBlockAsm64K
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
-
-two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBetterBlockAsm64K
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
-
-one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
-
-memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000280, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBetterBlockAsm12B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBetterBlockAsm12B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBetterBlockAsm12B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x06, SI
- LEAL 1(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm12B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x34, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 65560(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 65560(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm12B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm12B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm12B
-
-candidateS_match_encodeSnappyBetterBlockAsm12B:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x32, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm12B
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBetterBlockAsm12B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B
-
-match_extend_back_loop_encodeSnappyBetterBlockAsm12B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBetterBlockAsm12B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B
- JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B
-
-match_extend_back_end_encodeSnappyBetterBlockAsm12B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBetterBlockAsm12B:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
-
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeSnappyBetterBlockAsm12B
-
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B
- JZ match_nolit_end_encodeSnappyBetterBlockAsm12B
-
-matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL R8, $0x01
- JL match_nolit_end_encodeSnappyBetterBlockAsm12B
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeSnappyBetterBlockAsm12B
- LEAL 1(R12), R12
-
-match_nolit_end_encodeSnappyBetterBlockAsm12B:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeSnappyBetterBlockAsm12B
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
-
-two_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeSnappyBetterBlockAsm12B
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
-
-one_byte_match_emit_encodeSnappyBetterBlockAsm12B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBetterBlockAsm12B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
-
-memmove_long_match_emit_encodeSnappyBetterBlockAsm12B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B
-
-two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
- CMPL R8, $0x00000800
- JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B
-
-emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm12B
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
- MOVQ $0x0000cf1bbcdcbf9b, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x32, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x34, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x34, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 65560(SP)(R11*4)
- MOVL R15, 65560(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x32, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x34, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 65560(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm12B
-
-emit_remainder_encodeSnappyBetterBlockAsm12B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBetterBlockAsm12B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
-
-two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBetterBlockAsm12B
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
-
-one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
-
-memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x000000a0, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBetterBlockAsm10B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBetterBlockAsm10B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBetterBlockAsm10B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x05, SI
- LEAL 1(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm10B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x34, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x36, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 16408(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 16408(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm10B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm10B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm10B
-
-candidateS_match_encodeSnappyBetterBlockAsm10B:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x34, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm10B
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBetterBlockAsm10B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B
-
-match_extend_back_loop_encodeSnappyBetterBlockAsm10B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBetterBlockAsm10B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B
- JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B
-
-match_extend_back_end_encodeSnappyBetterBlockAsm10B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBetterBlockAsm10B:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
-
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeSnappyBetterBlockAsm10B
-
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B
- JZ match_nolit_end_encodeSnappyBetterBlockAsm10B
-
-matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL R8, $0x01
- JL match_nolit_end_encodeSnappyBetterBlockAsm10B
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeSnappyBetterBlockAsm10B
- LEAL 1(R12), R12
-
-match_nolit_end_encodeSnappyBetterBlockAsm10B:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeSnappyBetterBlockAsm10B
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
-
-two_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeSnappyBetterBlockAsm10B
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
-
-one_byte_match_emit_encodeSnappyBetterBlockAsm10B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBetterBlockAsm10B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
-
-memmove_long_match_emit_encodeSnappyBetterBlockAsm10B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B
-
-two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
- CMPL R8, $0x00000800
- JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B
-
-emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm10B
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
- MOVQ $0x0000cf1bbcdcbf9b, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x34, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x36, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x36, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 16408(SP)(R11*4)
- MOVL R15, 16408(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x34, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x36, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 16408(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm10B
-
-emit_remainder_encodeSnappyBetterBlockAsm10B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBetterBlockAsm10B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
-
-two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBetterBlockAsm10B
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
-
-one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
-
-memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
-// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56
- MOVQ dst_base+0(FP), AX
- MOVQ $0x00000028, CX
- LEAQ 24(SP), DX
- PXOR X0, X0
-
-zero_loop_encodeSnappyBetterBlockAsm8B:
- MOVOU X0, (DX)
- MOVOU X0, 16(DX)
- MOVOU X0, 32(DX)
- MOVOU X0, 48(DX)
- MOVOU X0, 64(DX)
- MOVOU X0, 80(DX)
- MOVOU X0, 96(DX)
- MOVOU X0, 112(DX)
- ADDQ $0x80, DX
- DECQ CX
- JNZ zero_loop_encodeSnappyBetterBlockAsm8B
- MOVL $0x00000000, 12(SP)
- MOVQ src_len+32(FP), CX
- LEAQ -9(CX), DX
- LEAQ -8(CX), SI
- MOVL SI, 8(SP)
- SHRQ $0x05, CX
- SUBL CX, DX
- LEAQ (AX)(DX*1), DX
- MOVQ DX, (SP)
- MOVL $0x00000001, CX
- MOVL $0x00000000, 16(SP)
- MOVQ src_base+24(FP), DX
-
-search_loop_encodeSnappyBetterBlockAsm8B:
- MOVL CX, SI
- SUBL 12(SP), SI
- SHRL $0x04, SI
- LEAL 1(CX)(SI*1), SI
- CMPL SI, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm8B
- MOVQ (DX)(CX*1), DI
- MOVL SI, 20(SP)
- MOVQ $0x0000cf1bbcdcbf9b, R9
- MOVQ $0x9e3779b1, SI
- MOVQ DI, R10
- MOVQ DI, R11
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ SI, R11
- SHRQ $0x38, R11
- MOVL 24(SP)(R10*4), SI
- MOVL 4120(SP)(R11*4), R8
- MOVL CX, 24(SP)(R10*4)
- MOVL CX, 4120(SP)(R11*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm8B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm8B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm8B
-
-candidateS_match_encodeSnappyBetterBlockAsm8B:
- SHRQ $0x08, DI
- MOVQ DI, R10
- SHLQ $0x10, R10
- IMULQ R9, R10
- SHRQ $0x36, R10
- MOVL 24(SP)(R10*4), SI
- INCL CX
- MOVL CX, 24(SP)(R10*4)
- CMPL (DX)(SI*1), DI
- JEQ candidate_match_encodeSnappyBetterBlockAsm8B
- DECL CX
- MOVL R8, SI
-
-candidate_match_encodeSnappyBetterBlockAsm8B:
- MOVL 12(SP), DI
- TESTL SI, SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B
-
-match_extend_back_loop_encodeSnappyBetterBlockAsm8B:
- CMPL CX, DI
- JLE match_extend_back_end_encodeSnappyBetterBlockAsm8B
- MOVB -1(DX)(SI*1), BL
- MOVB -1(DX)(CX*1), R8
- CMPB BL, R8
- JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B
- LEAL -1(CX), CX
- DECL SI
- JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B
- JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B
-
-match_extend_back_end_encodeSnappyBetterBlockAsm8B:
- MOVL CX, DI
- SUBL 12(SP), DI
- LEAQ 3(AX)(DI*1), DI
- CMPQ DI, (SP)
- JL match_dst_size_check_encodeSnappyBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_dst_size_check_encodeSnappyBetterBlockAsm8B:
- MOVL CX, DI
- ADDL $0x04, CX
- ADDL $0x04, SI
- MOVQ src_len+32(FP), R8
- SUBL CX, R8
- LEAQ (DX)(CX*1), R9
- LEAQ (DX)(SI*1), R10
-
- // matchLen
- XORL R12, R12
- CMPL R8, $0x08
- JL matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
-
-matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B:
- MOVQ (R9)(R12*1), R11
- XORQ (R10)(R12*1), R11
- TESTQ R11, R11
- JZ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B
-
-#ifdef GOAMD64_v3
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ R11, R11
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ R11, R11
-
-#endif
- SARQ $0x03, R11
- LEAL (R12)(R11*1), R12
- JMP match_nolit_end_encodeSnappyBetterBlockAsm8B
-
-matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B:
- LEAL -8(R8), R8
- LEAL 8(R12), R12
- CMPL R8, $0x08
- JGE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B
- JZ match_nolit_end_encodeSnappyBetterBlockAsm8B
-
-matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL R8, $0x04
- JL matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
- MOVL (R9)(R12*1), R11
- CMPL (R10)(R12*1), R11
- JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
- SUBL $0x04, R8
- LEAL 4(R12), R12
-
-matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL R8, $0x02
- JL matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
- MOVW (R9)(R12*1), R11
- CMPW (R10)(R12*1), R11
- JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
- SUBL $0x02, R8
- LEAL 2(R12), R12
-
-matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL R8, $0x01
- JL match_nolit_end_encodeSnappyBetterBlockAsm8B
- MOVB (R9)(R12*1), R11
- CMPB (R10)(R12*1), R11
- JNE match_nolit_end_encodeSnappyBetterBlockAsm8B
- LEAL 1(R12), R12
-
-match_nolit_end_encodeSnappyBetterBlockAsm8B:
- MOVL CX, R8
- SUBL SI, R8
-
- // Check if repeat
- MOVL R8, 16(SP)
- MOVL 12(SP), SI
- CMPL SI, DI
- JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
- MOVL DI, R9
- MOVL DI, 12(SP)
- LEAQ (DX)(SI*1), R10
- SUBL SI, R9
- LEAL -1(R9), SI
- CMPL SI, $0x3c
- JLT one_byte_match_emit_encodeSnappyBetterBlockAsm8B
- CMPL SI, $0x00000100
- JLT two_bytes_match_emit_encodeSnappyBetterBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
-
-two_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_match_emit_encodeSnappyBetterBlockAsm8B
- JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
-
-one_byte_match_emit_encodeSnappyBetterBlockAsm8B:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, AX
-
-memmove_match_emit_encodeSnappyBetterBlockAsm8B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveShort
- CMPQ R9, $0x08
- JLE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8
- CMPQ R9, $0x10
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
- CMPQ R9, $0x20
- JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8:
- MOVQ (R10), R11
- MOVQ R11, (AX)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (R10), R11
- MOVQ -8(R10)(R9*1), R10
- MOVQ R11, (AX)
- MOVQ R10, -8(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (R10), X0
- MOVOU -16(R10)(R9*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(R9*1)
- JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
-
-emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
-
-memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B:
- MOVQ SI, AX
- JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
-
-memmove_long_match_emit_encodeSnappyBetterBlockAsm8B:
- LEAQ (AX)(R9*1), SI
-
- // genMemMoveLong
- MOVOU (R10), X0
- MOVOU 16(R10), X1
- MOVOU -32(R10)(R9*1), X2
- MOVOU -16(R10)(R9*1), X3
- MOVQ R9, R13
- SHRQ $0x05, R13
- MOVQ AX, R11
- ANDL $0x0000001f, R11
- MOVQ $0x00000040, R14
- SUBQ R11, R14
- DECQ R13
- JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(R10)(R14*1), R11
- LEAQ -32(AX)(R14*1), R15
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
- MOVOU (R11), X4
- MOVOU 16(R11), X5
- MOVOA X4, (R15)
- MOVOA X5, 16(R15)
- ADDQ $0x20, R15
- ADDQ $0x20, R11
- ADDQ $0x20, R14
- DECQ R13
- JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(R10)(R14*1), X4
- MOVOU -16(R10)(R14*1), X5
- MOVOA X4, -32(AX)(R14*1)
- MOVOA X5, -16(AX)(R14*1)
- ADDQ $0x20, R14
- CMPQ R9, R14
- JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(R9*1)
- MOVOU X3, -16(AX)(R9*1)
- MOVQ SI, AX
-
-emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B:
- ADDL R12, CX
- ADDL $0x04, R12
- MOVL CX, 12(SP)
-
- // emitCopy
-two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL R12, $0x40
- JLE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B
- MOVB $0xee, (AX)
- MOVW R8, 1(AX)
- LEAL -60(R12), R12
- ADDQ $0x03, AX
- JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B
-
-two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B:
- CMPL R12, $0x0c
- JGE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B
- MOVB $0x01, BL
- LEAL -16(BX)(R12*4), R12
- MOVB R8, 1(AX)
- SHRL $0x08, R8
- SHLL $0x05, R8
- ORL R8, R12
- MOVB R12, (AX)
- ADDQ $0x02, AX
- JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B
-
-emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B:
- MOVB $0x02, BL
- LEAL -4(BX)(R12*4), R12
- MOVB R12, (AX)
- MOVW R8, 1(AX)
- ADDQ $0x03, AX
-
-match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
- CMPL CX, 8(SP)
- JGE emit_remainder_encodeSnappyBetterBlockAsm8B
- CMPQ AX, (SP)
- JL match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
- MOVQ $0x0000cf1bbcdcbf9b, SI
- MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x36, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x38, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x38, R12
- MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 4120(SP)(R11*4)
- MOVL R15, 4120(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
- SHLQ $0x10, R10
- IMULQ SI, R10
- SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x38, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
- MOVL R9, 24(SP)(R10*4)
- MOVL DI, 4120(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm8B
-
-emit_remainder_encodeSnappyBetterBlockAsm8B:
- MOVQ src_len+32(FP), CX
- SUBL 12(SP), CX
- LEAQ 3(AX)(CX*1), CX
- CMPQ CX, (SP)
- JL emit_remainder_ok_encodeSnappyBetterBlockAsm8B
- MOVQ $0x00000000, ret+48(FP)
- RET
-
-emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
- MOVQ src_len+32(FP), CX
- MOVL 12(SP), BX
- CMPL BX, CX
- JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
- MOVL CX, SI
- MOVL CX, 12(SP)
- LEAQ (DX)(BX*1), CX
- SUBL BX, SI
- LEAL -1(SI), DX
- CMPL DX, $0x3c
- JLT one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B
- CMPL DX, $0x00000100
- JLT two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
- MOVB $0xf4, (AX)
- MOVW DX, 1(AX)
- ADDQ $0x03, AX
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
-
-two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
- MOVB $0xf0, (AX)
- MOVB DL, 1(AX)
- ADDQ $0x02, AX
- CMPL DX, $0x40
- JL memmove_emit_remainder_encodeSnappyBetterBlockAsm8B
- JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
-
-one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B:
- SHLB $0x02, DL
- MOVB DL, (AX)
- ADDQ $0x01, AX
-
-memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveShort
- CMPQ BX, $0x08
- JLE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8
- CMPQ BX, $0x10
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
- CMPQ BX, $0x20
- JBE emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
- JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8:
- MOVQ (CX), SI
- MOVQ SI, (AX)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(BX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(BX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(BX*1)
- JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
-
-memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B:
- MOVQ DX, AX
- JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
-
-memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B:
- LEAQ (AX)(SI*1), DX
- MOVL SI, BX
-
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(BX*1), X2
- MOVOU -16(CX)(BX*1), X3
- MOVQ BX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
-
-emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ BX, R8
- JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(BX*1)
- MOVOU X3, -16(AX)(BX*1)
- MOVQ DX, AX
-
-emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B:
- MOVQ dst_base+0(FP), CX
- SUBQ CX, AX
- MOVQ AX, ret+48(FP)
- RET
-
-// func emitLiteral(dst []byte, lit []byte) int
-// Requires: SSE2
-TEXT ·emitLiteral(SB), NOSPLIT, $0-56
- MOVQ lit_len+32(FP), DX
- MOVQ dst_base+0(FP), AX
- MOVQ lit_base+24(FP), CX
- TESTQ DX, DX
- JZ emit_literal_end_standalone_skip
- MOVL DX, BX
- LEAL -1(DX), SI
- CMPL SI, $0x3c
- JLT one_byte_standalone
- CMPL SI, $0x00000100
- JLT two_bytes_standalone
- CMPL SI, $0x00010000
- JLT three_bytes_standalone
- CMPL SI, $0x01000000
- JLT four_bytes_standalone
- MOVB $0xfc, (AX)
- MOVL SI, 1(AX)
- ADDQ $0x05, BX
- ADDQ $0x05, AX
- JMP memmove_long_standalone
-
-four_bytes_standalone:
- MOVL SI, DI
- SHRL $0x10, DI
- MOVB $0xf8, (AX)
- MOVW SI, 1(AX)
- MOVB DI, 3(AX)
- ADDQ $0x04, BX
- ADDQ $0x04, AX
- JMP memmove_long_standalone
-
-three_bytes_standalone:
- MOVB $0xf4, (AX)
- MOVW SI, 1(AX)
- ADDQ $0x03, BX
- ADDQ $0x03, AX
- JMP memmove_long_standalone
-
-two_bytes_standalone:
- MOVB $0xf0, (AX)
- MOVB SI, 1(AX)
- ADDQ $0x02, BX
- ADDQ $0x02, AX
- CMPL SI, $0x40
- JL memmove_standalone
- JMP memmove_long_standalone
-
-one_byte_standalone:
- SHLB $0x02, SI
- MOVB SI, (AX)
- ADDQ $0x01, BX
- ADDQ $0x01, AX
-
-memmove_standalone:
- // genMemMoveShort
- CMPQ DX, $0x03
- JB emit_lit_memmove_standalone_memmove_move_1or2
- JE emit_lit_memmove_standalone_memmove_move_3
- CMPQ DX, $0x08
- JB emit_lit_memmove_standalone_memmove_move_4through7
- CMPQ DX, $0x10
- JBE emit_lit_memmove_standalone_memmove_move_8through16
- CMPQ DX, $0x20
- JBE emit_lit_memmove_standalone_memmove_move_17through32
- JMP emit_lit_memmove_standalone_memmove_move_33through64
-
-emit_lit_memmove_standalone_memmove_move_1or2:
- MOVB (CX), SI
- MOVB -1(CX)(DX*1), CL
- MOVB SI, (AX)
- MOVB CL, -1(AX)(DX*1)
- JMP emit_literal_end_standalone
-
-emit_lit_memmove_standalone_memmove_move_3:
- MOVW (CX), SI
- MOVB 2(CX), CL
- MOVW SI, (AX)
- MOVB CL, 2(AX)
- JMP emit_literal_end_standalone
-
-emit_lit_memmove_standalone_memmove_move_4through7:
- MOVL (CX), SI
- MOVL -4(CX)(DX*1), CX
- MOVL SI, (AX)
- MOVL CX, -4(AX)(DX*1)
- JMP emit_literal_end_standalone
-
-emit_lit_memmove_standalone_memmove_move_8through16:
- MOVQ (CX), SI
- MOVQ -8(CX)(DX*1), CX
- MOVQ SI, (AX)
- MOVQ CX, -8(AX)(DX*1)
- JMP emit_literal_end_standalone
-
-emit_lit_memmove_standalone_memmove_move_17through32:
- MOVOU (CX), X0
- MOVOU -16(CX)(DX*1), X1
- MOVOU X0, (AX)
- MOVOU X1, -16(AX)(DX*1)
- JMP emit_literal_end_standalone
-
-emit_lit_memmove_standalone_memmove_move_33through64:
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(DX*1), X2
- MOVOU -16(CX)(DX*1), X3
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DX*1)
- MOVOU X3, -16(AX)(DX*1)
- JMP emit_literal_end_standalone
- JMP emit_literal_end_standalone
-
-memmove_long_standalone:
- // genMemMoveLong
- MOVOU (CX), X0
- MOVOU 16(CX), X1
- MOVOU -32(CX)(DX*1), X2
- MOVOU -16(CX)(DX*1), X3
- MOVQ DX, DI
- SHRQ $0x05, DI
- MOVQ AX, SI
- ANDL $0x0000001f, SI
- MOVQ $0x00000040, R8
- SUBQ SI, R8
- DECQ DI
- JA emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
- LEAQ -32(CX)(R8*1), SI
- LEAQ -32(AX)(R8*1), R9
-
-emit_lit_memmove_long_standalonelarge_big_loop_back:
- MOVOU (SI), X4
- MOVOU 16(SI), X5
- MOVOA X4, (R9)
- MOVOA X5, 16(R9)
- ADDQ $0x20, R9
- ADDQ $0x20, SI
- ADDQ $0x20, R8
- DECQ DI
- JNA emit_lit_memmove_long_standalonelarge_big_loop_back
-
-emit_lit_memmove_long_standalonelarge_forward_sse_loop_32:
- MOVOU -32(CX)(R8*1), X4
- MOVOU -16(CX)(R8*1), X5
- MOVOA X4, -32(AX)(R8*1)
- MOVOA X5, -16(AX)(R8*1)
- ADDQ $0x20, R8
- CMPQ DX, R8
- JAE emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
- MOVOU X0, (AX)
- MOVOU X1, 16(AX)
- MOVOU X2, -32(AX)(DX*1)
- MOVOU X3, -16(AX)(DX*1)
- JMP emit_literal_end_standalone
- JMP emit_literal_end_standalone
-
-emit_literal_end_standalone_skip:
- XORQ BX, BX
-
-emit_literal_end_standalone:
- MOVQ BX, ret+48(FP)
- RET
-
-// func emitRepeat(dst []byte, offset int, length int) int
-TEXT ·emitRepeat(SB), NOSPLIT, $0-48
- XORQ BX, BX
- MOVQ dst_base+0(FP), AX
- MOVQ offset+24(FP), CX
- MOVQ length+32(FP), DX
-
- // emitRepeat
-emit_repeat_again_standalone:
- MOVL DX, SI
- LEAL -4(DX), DX
- CMPL SI, $0x08
- JLE repeat_two_standalone
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_standalone
- CMPL CX, $0x00000800
- JLT repeat_two_offset_standalone
-
-cant_repeat_two_offset_standalone:
- CMPL DX, $0x00000104
- JLT repeat_three_standalone
- CMPL DX, $0x00010100
- JLT repeat_four_standalone
- CMPL DX, $0x0100ffff
- JLT repeat_five_standalone
- LEAL -16842747(DX), DX
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- ADDQ $0x05, BX
- JMP emit_repeat_again_standalone
-
-repeat_five_standalone:
- LEAL -65536(DX), DX
- MOVL DX, CX
- MOVW $0x001d, (AX)
- MOVW DX, 2(AX)
- SARL $0x10, CX
- MOVB CL, 4(AX)
- ADDQ $0x05, BX
- ADDQ $0x05, AX
- JMP gen_emit_repeat_end
-
-repeat_four_standalone:
- LEAL -256(DX), DX
- MOVW $0x0019, (AX)
- MOVW DX, 2(AX)
- ADDQ $0x04, BX
- ADDQ $0x04, AX
- JMP gen_emit_repeat_end
-
-repeat_three_standalone:
- LEAL -4(DX), DX
- MOVW $0x0015, (AX)
- MOVB DL, 2(AX)
- ADDQ $0x03, BX
- ADDQ $0x03, AX
- JMP gen_emit_repeat_end
-
-repeat_two_standalone:
- SHLL $0x02, DX
- ORL $0x01, DX
- MOVW DX, (AX)
- ADDQ $0x02, BX
- ADDQ $0x02, AX
- JMP gen_emit_repeat_end
-
-repeat_two_offset_standalone:
- XORQ SI, SI
- LEAL 1(SI)(DX*4), DX
- MOVB CL, 1(AX)
- SARL $0x08, CX
- SHLL $0x05, CX
- ORL CX, DX
- MOVB DL, (AX)
- ADDQ $0x02, BX
- ADDQ $0x02, AX
-
-gen_emit_repeat_end:
- MOVQ BX, ret+40(FP)
- RET
-
-// func emitCopy(dst []byte, offset int, length int) int
-TEXT ·emitCopy(SB), NOSPLIT, $0-48
- XORQ BX, BX
- MOVQ dst_base+0(FP), AX
- MOVQ offset+24(FP), CX
- MOVQ length+32(FP), DX
-
- // emitCopy
- CMPL CX, $0x00010000
- JL two_byte_offset_standalone
-
-four_bytes_loop_back_standalone:
- CMPL DX, $0x40
- JLE four_bytes_remain_standalone
- MOVB $0xff, (AX)
- MOVL CX, 1(AX)
- LEAL -64(DX), DX
- ADDQ $0x05, BX
- ADDQ $0x05, AX
- CMPL DX, $0x04
- JL four_bytes_remain_standalone
-
- // emitRepeat
-emit_repeat_again_standalone_emit_copy:
- MOVL DX, SI
- LEAL -4(DX), DX
- CMPL SI, $0x08
- JLE repeat_two_standalone_emit_copy
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_standalone_emit_copy
- CMPL CX, $0x00000800
- JLT repeat_two_offset_standalone_emit_copy
-
-cant_repeat_two_offset_standalone_emit_copy:
- CMPL DX, $0x00000104
- JLT repeat_three_standalone_emit_copy
- CMPL DX, $0x00010100
- JLT repeat_four_standalone_emit_copy
- CMPL DX, $0x0100ffff
- JLT repeat_five_standalone_emit_copy
- LEAL -16842747(DX), DX
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- ADDQ $0x05, BX
- JMP emit_repeat_again_standalone_emit_copy
-
-repeat_five_standalone_emit_copy:
- LEAL -65536(DX), DX
- MOVL DX, CX
- MOVW $0x001d, (AX)
- MOVW DX, 2(AX)
- SARL $0x10, CX
- MOVB CL, 4(AX)
- ADDQ $0x05, BX
- ADDQ $0x05, AX
- JMP gen_emit_copy_end
-
-repeat_four_standalone_emit_copy:
- LEAL -256(DX), DX
- MOVW $0x0019, (AX)
- MOVW DX, 2(AX)
- ADDQ $0x04, BX
- ADDQ $0x04, AX
- JMP gen_emit_copy_end
-
-repeat_three_standalone_emit_copy:
- LEAL -4(DX), DX
- MOVW $0x0015, (AX)
- MOVB DL, 2(AX)
- ADDQ $0x03, BX
- ADDQ $0x03, AX
- JMP gen_emit_copy_end
-
-repeat_two_standalone_emit_copy:
- SHLL $0x02, DX
- ORL $0x01, DX
- MOVW DX, (AX)
- ADDQ $0x02, BX
- ADDQ $0x02, AX
- JMP gen_emit_copy_end
-
-repeat_two_offset_standalone_emit_copy:
- XORQ SI, SI
- LEAL 1(SI)(DX*4), DX
- MOVB CL, 1(AX)
- SARL $0x08, CX
- SHLL $0x05, CX
- ORL CX, DX
- MOVB DL, (AX)
- ADDQ $0x02, BX
- ADDQ $0x02, AX
- JMP gen_emit_copy_end
- JMP four_bytes_loop_back_standalone
-
-four_bytes_remain_standalone:
- TESTL DX, DX
- JZ gen_emit_copy_end
- MOVB $0x03, SI
- LEAL -4(SI)(DX*4), DX
- MOVB DL, (AX)
- MOVL CX, 1(AX)
- ADDQ $0x05, BX
- ADDQ $0x05, AX
- JMP gen_emit_copy_end
-
-two_byte_offset_standalone:
- CMPL DX, $0x40
- JLE two_byte_offset_short_standalone
- MOVB $0xee, (AX)
- MOVW CX, 1(AX)
- LEAL -60(DX), DX
- ADDQ $0x03, AX
- ADDQ $0x03, BX
-
- // emitRepeat
-emit_repeat_again_standalone_emit_copy_short:
- MOVL DX, SI
- LEAL -4(DX), DX
- CMPL SI, $0x08
- JLE repeat_two_standalone_emit_copy_short
- CMPL SI, $0x0c
- JGE cant_repeat_two_offset_standalone_emit_copy_short
- CMPL CX, $0x00000800
- JLT repeat_two_offset_standalone_emit_copy_short
-
-cant_repeat_two_offset_standalone_emit_copy_short:
- CMPL DX, $0x00000104
- JLT repeat_three_standalone_emit_copy_short
- CMPL DX, $0x00010100
- JLT repeat_four_standalone_emit_copy_short
- CMPL DX, $0x0100ffff
- JLT repeat_five_standalone_emit_copy_short
- LEAL -16842747(DX), DX
- MOVW $0x001d, (AX)
- MOVW $0xfffb, 2(AX)
- MOVB $0xff, 4(AX)
- ADDQ $0x05, AX
- ADDQ $0x05, BX
- JMP emit_repeat_again_standalone_emit_copy_short
-
-repeat_five_standalone_emit_copy_short:
- LEAL -65536(DX), DX
- MOVL DX, CX
- MOVW $0x001d, (AX)
- MOVW DX, 2(AX)
- SARL $0x10, CX
- MOVB CL, 4(AX)
- ADDQ $0x05, BX
- ADDQ $0x05, AX
- JMP gen_emit_copy_end
-
-repeat_four_standalone_emit_copy_short:
- LEAL -256(DX), DX
- MOVW $0x0019, (AX)
- MOVW DX, 2(AX)
- ADDQ $0x04, BX
- ADDQ $0x04, AX
- JMP gen_emit_copy_end
-
-repeat_three_standalone_emit_copy_short:
- LEAL -4(DX), DX
- MOVW $0x0015, (AX)
- MOVB DL, 2(AX)
- ADDQ $0x03, BX
- ADDQ $0x03, AX
- JMP gen_emit_copy_end
-
-repeat_two_standalone_emit_copy_short:
- SHLL $0x02, DX
- ORL $0x01, DX
- MOVW DX, (AX)
- ADDQ $0x02, BX
- ADDQ $0x02, AX
- JMP gen_emit_copy_end
-
-repeat_two_offset_standalone_emit_copy_short:
- XORQ SI, SI
- LEAL 1(SI)(DX*4), DX
- MOVB CL, 1(AX)
- SARL $0x08, CX
- SHLL $0x05, CX
- ORL CX, DX
- MOVB DL, (AX)
- ADDQ $0x02, BX
- ADDQ $0x02, AX
- JMP gen_emit_copy_end
- JMP two_byte_offset_standalone
-
-two_byte_offset_short_standalone:
- CMPL DX, $0x0c
- JGE emit_copy_three_standalone
- CMPL CX, $0x00000800
- JGE emit_copy_three_standalone
- MOVB $0x01, SI
- LEAL -16(SI)(DX*4), DX
- MOVB CL, 1(AX)
- SHRL $0x08, CX
- SHLL $0x05, CX
- ORL CX, DX
- MOVB DL, (AX)
- ADDQ $0x02, BX
- ADDQ $0x02, AX
- JMP gen_emit_copy_end
-
-emit_copy_three_standalone:
- MOVB $0x02, SI
- LEAL -4(SI)(DX*4), DX
- MOVB DL, (AX)
- MOVW CX, 1(AX)
- ADDQ $0x03, BX
- ADDQ $0x03, AX
-
-gen_emit_copy_end:
- MOVQ BX, ret+40(FP)
- RET
-
-// func emitCopyNoRepeat(dst []byte, offset int, length int) int
-TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48
- XORQ BX, BX
- MOVQ dst_base+0(FP), AX
- MOVQ offset+24(FP), CX
- MOVQ length+32(FP), DX
-
- // emitCopy
- CMPL CX, $0x00010000
- JL two_byte_offset_standalone_snappy
-
-four_bytes_loop_back_standalone_snappy:
- CMPL DX, $0x40
- JLE four_bytes_remain_standalone_snappy
- MOVB $0xff, (AX)
- MOVL CX, 1(AX)
- LEAL -64(DX), DX
- ADDQ $0x05, BX
- ADDQ $0x05, AX
- CMPL DX, $0x04
- JL four_bytes_remain_standalone_snappy
- JMP four_bytes_loop_back_standalone_snappy
-
-four_bytes_remain_standalone_snappy:
- TESTL DX, DX
- JZ gen_emit_copy_end_snappy
- MOVB $0x03, SI
- LEAL -4(SI)(DX*4), DX
- MOVB DL, (AX)
- MOVL CX, 1(AX)
- ADDQ $0x05, BX
- ADDQ $0x05, AX
- JMP gen_emit_copy_end_snappy
-
-two_byte_offset_standalone_snappy:
- CMPL DX, $0x40
- JLE two_byte_offset_short_standalone_snappy
- MOVB $0xee, (AX)
- MOVW CX, 1(AX)
- LEAL -60(DX), DX
- ADDQ $0x03, AX
- ADDQ $0x03, BX
- JMP two_byte_offset_standalone_snappy
-
-two_byte_offset_short_standalone_snappy:
- CMPL DX, $0x0c
- JGE emit_copy_three_standalone_snappy
- CMPL CX, $0x00000800
- JGE emit_copy_three_standalone_snappy
- MOVB $0x01, SI
- LEAL -16(SI)(DX*4), DX
- MOVB CL, 1(AX)
- SHRL $0x08, CX
- SHLL $0x05, CX
- ORL CX, DX
- MOVB DL, (AX)
- ADDQ $0x02, BX
- ADDQ $0x02, AX
- JMP gen_emit_copy_end_snappy
-
-emit_copy_three_standalone_snappy:
- MOVB $0x02, SI
- LEAL -4(SI)(DX*4), DX
- MOVB DL, (AX)
- MOVW CX, 1(AX)
- ADDQ $0x03, BX
- ADDQ $0x03, AX
-
-gen_emit_copy_end_snappy:
- MOVQ BX, ret+40(FP)
- RET
-
-// func matchLen(a []byte, b []byte) int
-// Requires: BMI
-TEXT ·matchLen(SB), NOSPLIT, $0-56
- MOVQ a_base+0(FP), AX
- MOVQ b_base+24(FP), CX
- MOVQ a_len+8(FP), DX
-
- // matchLen
- XORL SI, SI
- CMPL DX, $0x08
- JL matchlen_match4_standalone
-
-matchlen_loopback_standalone:
- MOVQ (AX)(SI*1), BX
- XORQ (CX)(SI*1), BX
- TESTQ BX, BX
- JZ matchlen_loop_standalone
-
-#ifdef GOAMD64_v3
- TZCNTQ BX, BX
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef GOAMD64_v4
- TZCNTQ BX, BX
-
-#define TZCNTQ_EMITTED 1
-#endif
-
-#ifdef TZCNTQ_EMITTED
-#undef TZCNTQ_EMITTED
-#else
- BSFQ BX, BX
-
-#endif
- SARQ $0x03, BX
- LEAL (SI)(BX*1), SI
- JMP gen_match_len_end
-
-matchlen_loop_standalone:
- LEAL -8(DX), DX
- LEAL 8(SI), SI
- CMPL DX, $0x08
- JGE matchlen_loopback_standalone
- JZ gen_match_len_end
-
-matchlen_match4_standalone:
- CMPL DX, $0x04
- JL matchlen_match2_standalone
- MOVL (AX)(SI*1), BX
- CMPL (CX)(SI*1), BX
- JNE matchlen_match2_standalone
- SUBL $0x04, DX
- LEAL 4(SI), SI
-
-matchlen_match2_standalone:
- CMPL DX, $0x02
- JL matchlen_match1_standalone
- MOVW (AX)(SI*1), BX
- CMPW (CX)(SI*1), BX
- JNE matchlen_match1_standalone
- SUBL $0x02, DX
- LEAL 2(SI), SI
-
-matchlen_match1_standalone:
- CMPL DX, $0x01
- JL gen_match_len_end
- MOVB (AX)(SI*1), BL
- CMPB (CX)(SI*1), BL
- JNE gen_match_len_end
- LEAL 1(SI), SI
-
-gen_match_len_end:
- MOVQ SI, ret+48(FP)
- RET
diff --git a/vendor/github.com/klauspost/compress/s2/index.go b/vendor/github.com/klauspost/compress/s2/index.go
deleted file mode 100644
index fd857682e..000000000
--- a/vendor/github.com/klauspost/compress/s2/index.go
+++ /dev/null
@@ -1,525 +0,0 @@
-// Copyright (c) 2022+ Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
- "bytes"
- "encoding/binary"
- "encoding/json"
- "fmt"
- "io"
-)
-
-const (
- S2IndexHeader = "s2idx\x00"
- S2IndexTrailer = "\x00xdi2s"
- maxIndexEntries = 1 << 16
-)
-
-// Index represents an S2/Snappy index.
-type Index struct {
- TotalUncompressed int64 // Total Uncompressed size if known. Will be -1 if unknown.
- TotalCompressed int64 // Total Compressed size if known. Will be -1 if unknown.
- info []struct {
- compressedOffset int64
- uncompressedOffset int64
- }
- estBlockUncomp int64
-}
-
-func (i *Index) reset(maxBlock int) {
- i.estBlockUncomp = int64(maxBlock)
- i.TotalCompressed = -1
- i.TotalUncompressed = -1
- if len(i.info) > 0 {
- i.info = i.info[:0]
- }
-}
-
-// allocInfos will allocate an empty slice of infos.
-func (i *Index) allocInfos(n int) {
- if n > maxIndexEntries {
- panic("n > maxIndexEntries")
- }
- i.info = make([]struct {
- compressedOffset int64
- uncompressedOffset int64
- }, 0, n)
-}
-
-// add an uncompressed and compressed pair.
-// Entries must be sent in order.
-func (i *Index) add(compressedOffset, uncompressedOffset int64) error {
- if i == nil {
- return nil
- }
- lastIdx := len(i.info) - 1
- if lastIdx >= 0 {
- latest := i.info[lastIdx]
- if latest.uncompressedOffset == uncompressedOffset {
- // Uncompressed didn't change, don't add entry,
- // but update start index.
- latest.compressedOffset = compressedOffset
- i.info[lastIdx] = latest
- return nil
- }
- if latest.uncompressedOffset > uncompressedOffset {
- return fmt.Errorf("internal error: Earlier uncompressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
- }
- if latest.compressedOffset > compressedOffset {
- return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
- }
- }
- i.info = append(i.info, struct {
- compressedOffset int64
- uncompressedOffset int64
- }{compressedOffset: compressedOffset, uncompressedOffset: uncompressedOffset})
- return nil
-}
-
-// Find the offset at or before the wanted (uncompressed) offset.
-// If offset is 0 or positive it is the offset from the beginning of the file.
-// If the uncompressed size is known, the offset must be within the file.
-// If an offset outside the file is requested io.ErrUnexpectedEOF is returned.
-// If the offset is negative, it is interpreted as the distance from the end of the file,
-// where -1 represents the last byte.
-// If offset from the end of the file is requested, but size is unknown,
-// ErrUnsupported will be returned.
-func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err error) {
- if i.TotalUncompressed < 0 {
- return 0, 0, ErrCorrupt
- }
- if offset < 0 {
- offset = i.TotalUncompressed + offset
- if offset < 0 {
- return 0, 0, io.ErrUnexpectedEOF
- }
- }
- if offset > i.TotalUncompressed {
- return 0, 0, io.ErrUnexpectedEOF
- }
- for _, info := range i.info {
- if info.uncompressedOffset > offset {
- break
- }
- compressedOff = info.compressedOffset
- uncompressedOff = info.uncompressedOffset
- }
- return compressedOff, uncompressedOff, nil
-}
-
-// reduce to stay below maxIndexEntries
-func (i *Index) reduce() {
- if len(i.info) < maxIndexEntries && i.estBlockUncomp >= 1<<20 {
- return
- }
-
- // Algorithm, keep 1, remove removeN entries...
- removeN := (len(i.info) + 1) / maxIndexEntries
- src := i.info
- j := 0
-
- // Each block should be at least 1MB, but don't reduce below 1000 entries.
- for i.estBlockUncomp*(int64(removeN)+1) < 1<<20 && len(i.info)/(removeN+1) > 1000 {
- removeN++
- }
- for idx := 0; idx < len(src); idx++ {
- i.info[j] = src[idx]
- j++
- idx += removeN
- }
- i.info = i.info[:j]
- // Update maxblock estimate.
- i.estBlockUncomp += i.estBlockUncomp * int64(removeN)
-}
-
-func (i *Index) appendTo(b []byte, uncompTotal, compTotal int64) []byte {
- i.reduce()
- var tmp [binary.MaxVarintLen64]byte
-
- initSize := len(b)
- // We make the start a skippable header+size.
- b = append(b, ChunkTypeIndex, 0, 0, 0)
- b = append(b, []byte(S2IndexHeader)...)
- // Total Uncompressed size
- n := binary.PutVarint(tmp[:], uncompTotal)
- b = append(b, tmp[:n]...)
- // Total Compressed size
- n = binary.PutVarint(tmp[:], compTotal)
- b = append(b, tmp[:n]...)
- // Put EstBlockUncomp size
- n = binary.PutVarint(tmp[:], i.estBlockUncomp)
- b = append(b, tmp[:n]...)
- // Put length
- n = binary.PutVarint(tmp[:], int64(len(i.info)))
- b = append(b, tmp[:n]...)
-
- // Check if we should add uncompressed offsets
- var hasUncompressed byte
- for idx, info := range i.info {
- if idx == 0 {
- if info.uncompressedOffset != 0 {
- hasUncompressed = 1
- break
- }
- continue
- }
- if info.uncompressedOffset != i.info[idx-1].uncompressedOffset+i.estBlockUncomp {
- hasUncompressed = 1
- break
- }
- }
- b = append(b, hasUncompressed)
-
- // Add each entry
- if hasUncompressed == 1 {
- for idx, info := range i.info {
- uOff := info.uncompressedOffset
- if idx > 0 {
- prev := i.info[idx-1]
- uOff -= prev.uncompressedOffset + (i.estBlockUncomp)
- }
- n = binary.PutVarint(tmp[:], uOff)
- b = append(b, tmp[:n]...)
- }
- }
-
- // Initial compressed size estimate.
- cPredict := i.estBlockUncomp / 2
-
- for idx, info := range i.info {
- cOff := info.compressedOffset
- if idx > 0 {
- prev := i.info[idx-1]
- cOff -= prev.compressedOffset + cPredict
- // Update compressed size prediction, with half the error.
- cPredict += cOff / 2
- }
- n = binary.PutVarint(tmp[:], cOff)
- b = append(b, tmp[:n]...)
- }
-
- // Add Total Size.
- // Stored as fixed size for easier reading.
- binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)-initSize+4+len(S2IndexTrailer)))
- b = append(b, tmp[:4]...)
- // Trailer
- b = append(b, []byte(S2IndexTrailer)...)
-
- // Update size
- chunkLen := len(b) - initSize - skippableFrameHeader
- b[initSize+1] = uint8(chunkLen >> 0)
- b[initSize+2] = uint8(chunkLen >> 8)
- b[initSize+3] = uint8(chunkLen >> 16)
- //fmt.Printf("chunklen: 0x%x Uncomp:%d, Comp:%d\n", chunkLen, uncompTotal, compTotal)
- return b
-}
-
-// Load a binary index.
-// A zero value Index can be used or a previous one can be reused.
-func (i *Index) Load(b []byte) ([]byte, error) {
- if len(b) <= 4+len(S2IndexHeader)+len(S2IndexTrailer) {
- return b, io.ErrUnexpectedEOF
- }
- if b[0] != ChunkTypeIndex {
- return b, ErrCorrupt
- }
- chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
- b = b[4:]
-
- // Validate we have enough...
- if len(b) < chunkLen {
- return b, io.ErrUnexpectedEOF
- }
- if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
- return b, ErrUnsupported
- }
- b = b[len(S2IndexHeader):]
-
- // Total Uncompressed
- if v, n := binary.Varint(b); n <= 0 || v < 0 {
- return b, ErrCorrupt
- } else {
- i.TotalUncompressed = v
- b = b[n:]
- }
-
- // Total Compressed
- if v, n := binary.Varint(b); n <= 0 {
- return b, ErrCorrupt
- } else {
- i.TotalCompressed = v
- b = b[n:]
- }
-
- // Read EstBlockUncomp
- if v, n := binary.Varint(b); n <= 0 {
- return b, ErrCorrupt
- } else {
- if v < 0 {
- return b, ErrCorrupt
- }
- i.estBlockUncomp = v
- b = b[n:]
- }
-
- var entries int
- if v, n := binary.Varint(b); n <= 0 {
- return b, ErrCorrupt
- } else {
- if v < 0 || v > maxIndexEntries {
- return b, ErrCorrupt
- }
- entries = int(v)
- b = b[n:]
- }
- if cap(i.info) < entries {
- i.allocInfos(entries)
- }
- i.info = i.info[:entries]
-
- if len(b) < 1 {
- return b, io.ErrUnexpectedEOF
- }
- hasUncompressed := b[0]
- b = b[1:]
- if hasUncompressed&1 != hasUncompressed {
- return b, ErrCorrupt
- }
-
- // Add each uncompressed entry
- for idx := range i.info {
- var uOff int64
- if hasUncompressed != 0 {
- // Load delta
- if v, n := binary.Varint(b); n <= 0 {
- return b, ErrCorrupt
- } else {
- uOff = v
- b = b[n:]
- }
- }
-
- if idx > 0 {
- prev := i.info[idx-1].uncompressedOffset
- uOff += prev + (i.estBlockUncomp)
- if uOff <= prev {
- return b, ErrCorrupt
- }
- }
- if uOff < 0 {
- return b, ErrCorrupt
- }
- i.info[idx].uncompressedOffset = uOff
- }
-
- // Initial compressed size estimate.
- cPredict := i.estBlockUncomp / 2
-
- // Add each compressed entry
- for idx := range i.info {
- var cOff int64
- if v, n := binary.Varint(b); n <= 0 {
- return b, ErrCorrupt
- } else {
- cOff = v
- b = b[n:]
- }
-
- if idx > 0 {
- // Update compressed size prediction, with half the error.
- cPredictNew := cPredict + cOff/2
-
- prev := i.info[idx-1].compressedOffset
- cOff += prev + cPredict
- if cOff <= prev {
- return b, ErrCorrupt
- }
- cPredict = cPredictNew
- }
- if cOff < 0 {
- return b, ErrCorrupt
- }
- i.info[idx].compressedOffset = cOff
- }
- if len(b) < 4+len(S2IndexTrailer) {
- return b, io.ErrUnexpectedEOF
- }
- // Skip size...
- b = b[4:]
-
- // Check trailer...
- if !bytes.Equal(b[:len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
- return b, ErrCorrupt
- }
- return b[len(S2IndexTrailer):], nil
-}
-
-// LoadStream will load an index from the end of the supplied stream.
-// ErrUnsupported will be returned if the signature cannot be found.
-// ErrCorrupt will be returned if unexpected values are found.
-// io.ErrUnexpectedEOF is returned if there are too few bytes.
-// IO errors are returned as-is.
-func (i *Index) LoadStream(rs io.ReadSeeker) error {
- // Go to end.
- _, err := rs.Seek(-10, io.SeekEnd)
- if err != nil {
- return err
- }
- var tmp [10]byte
- _, err = io.ReadFull(rs, tmp[:])
- if err != nil {
- return err
- }
- // Check trailer...
- if !bytes.Equal(tmp[4:4+len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
- return ErrUnsupported
- }
- sz := binary.LittleEndian.Uint32(tmp[:4])
- if sz > maxChunkSize+skippableFrameHeader {
- return ErrCorrupt
- }
- _, err = rs.Seek(-int64(sz), io.SeekEnd)
- if err != nil {
- return err
- }
-
- // Read index.
- buf := make([]byte, sz)
- _, err = io.ReadFull(rs, buf)
- if err != nil {
- return err
- }
- _, err = i.Load(buf)
- return err
-}
-
-// IndexStream will return an index for a stream.
-// The stream structure will be checked, but
-// data within blocks is not verified.
-// The returned index can either be appended to the end of the stream
-// or stored separately.
-func IndexStream(r io.Reader) ([]byte, error) {
- var i Index
- var buf [maxChunkSize]byte
- var readHeader bool
- for {
- _, err := io.ReadFull(r, buf[:4])
- if err != nil {
- if err == io.EOF {
- return i.appendTo(nil, i.TotalUncompressed, i.TotalCompressed), nil
- }
- return nil, err
- }
- // Start of this chunk.
- startChunk := i.TotalCompressed
- i.TotalCompressed += 4
-
- chunkType := buf[0]
- if !readHeader {
- if chunkType != chunkTypeStreamIdentifier {
- return nil, ErrCorrupt
- }
- readHeader = true
- }
- chunkLen := int(buf[1]) | int(buf[2])<<8 | int(buf[3])<<16
- if chunkLen < checksumSize {
- return nil, ErrCorrupt
- }
-
- i.TotalCompressed += int64(chunkLen)
- _, err = io.ReadFull(r, buf[:chunkLen])
- if err != nil {
- return nil, io.ErrUnexpectedEOF
- }
- // The chunk types are specified at
- // https://github.com/google/snappy/blob/master/framing_format.txt
- switch chunkType {
- case chunkTypeCompressedData:
- // Section 4.2. Compressed data (chunk type 0x00).
- // Skip checksum.
- dLen, err := DecodedLen(buf[checksumSize:])
- if err != nil {
- return nil, err
- }
- if dLen > maxBlockSize {
- return nil, ErrCorrupt
- }
- if i.estBlockUncomp == 0 {
- // Use first block for estimate...
- i.estBlockUncomp = int64(dLen)
- }
- err = i.add(startChunk, i.TotalUncompressed)
- if err != nil {
- return nil, err
- }
- i.TotalUncompressed += int64(dLen)
- continue
- case chunkTypeUncompressedData:
- n2 := chunkLen - checksumSize
- if n2 > maxBlockSize {
- return nil, ErrCorrupt
- }
- if i.estBlockUncomp == 0 {
- // Use first block for estimate...
- i.estBlockUncomp = int64(n2)
- }
- err = i.add(startChunk, i.TotalUncompressed)
- if err != nil {
- return nil, err
- }
- i.TotalUncompressed += int64(n2)
- continue
- case chunkTypeStreamIdentifier:
- // Section 4.1. Stream identifier (chunk type 0xff).
- if chunkLen != len(magicBody) {
- return nil, ErrCorrupt
- }
-
- if string(buf[:len(magicBody)]) != magicBody {
- if string(buf[:len(magicBody)]) != magicBodySnappy {
- return nil, ErrCorrupt
- }
- }
-
- continue
- }
-
- if chunkType <= 0x7f {
- // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
- return nil, ErrUnsupported
- }
- if chunkLen > maxChunkSize {
- return nil, ErrUnsupported
- }
- // Section 4.4 Padding (chunk type 0xfe).
- // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
- }
-}
-
-// JSON returns the index as JSON text.
-func (i *Index) JSON() []byte {
- x := struct {
- TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
- TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
- Offsets []struct {
- CompressedOffset int64 `json:"compressed"`
- UncompressedOffset int64 `json:"uncompressed"`
- } `json:"offsets"`
- EstBlockUncomp int64 `json:"est_block_uncompressed"`
- }{
- TotalUncompressed: i.TotalUncompressed,
- TotalCompressed: i.TotalCompressed,
- EstBlockUncomp: i.estBlockUncomp,
- }
- for _, v := range i.info {
- x.Offsets = append(x.Offsets, struct {
- CompressedOffset int64 `json:"compressed"`
- UncompressedOffset int64 `json:"uncompressed"`
- }{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
- }
- b, _ := json.MarshalIndent(x, "", " ")
- return b
-}
diff --git a/vendor/github.com/klauspost/compress/s2/s2.go b/vendor/github.com/klauspost/compress/s2/s2.go
deleted file mode 100644
index dae3f731f..000000000
--- a/vendor/github.com/klauspost/compress/s2/s2.go
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package s2 implements the S2 compression format.
-//
-// S2 is an extension of Snappy. Similar to Snappy S2 is aimed for high throughput,
-// which is why it features concurrent compression for bigger payloads.
-//
-// Decoding is compatible with Snappy compressed content,
-// but content compressed with S2 cannot be decompressed by Snappy.
-//
-// For more information on Snappy/S2 differences see README in: https://github.com/klauspost/compress/tree/master/s2
-//
-// There are actually two S2 formats: block and stream. They are related,
-// but different: trying to decompress block-compressed data as a S2 stream
-// will fail, and vice versa. The block format is the Decode and Encode
-// functions and the stream format is the Reader and Writer types.
-//
-// A "better" compression option is available. This will trade some compression
-// speed
-//
-// The block format, the more common case, is used when the complete size (the
-// number of bytes) of the original data is known upfront, at the time
-// compression starts. The stream format, also known as the framing format, is
-// for when that isn't always true.
-//
-// Blocks to not offer much data protection, so it is up to you to
-// add data validation of decompressed blocks.
-//
-// Streams perform CRC validation of the decompressed data.
-// Stream compression will also be performed on multiple CPU cores concurrently
-// significantly improving throughput.
-package s2
-
-import (
- "bytes"
- "hash/crc32"
-)
-
-/*
-Each encoded block begins with the varint-encoded length of the decoded data,
-followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
-first byte of each chunk is broken into its 2 least and 6 most significant bits
-called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
-Zero means a literal tag. All other values mean a copy tag.
-
-For literal tags:
- - If m < 60, the next 1 + m bytes are literal bytes.
- - Otherwise, let n be the little-endian unsigned integer denoted by the next
- m - 59 bytes. The next 1 + n bytes after that are literal bytes.
-
-For copy tags, length bytes are copied from offset bytes ago, in the style of
-Lempel-Ziv compression algorithms. In particular:
- - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
- The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
- of the offset. The next byte is bits 0-7 of the offset.
- - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
- The length is 1 + m. The offset is the little-endian unsigned integer
- denoted by the next 2 bytes.
- - For l == 3, the offset ranges in [0, 1<<32) and the length in
- [1, 65). The length is 1 + m. The offset is the little-endian unsigned
- integer denoted by the next 4 bytes.
-*/
-const (
- tagLiteral = 0x00
- tagCopy1 = 0x01
- tagCopy2 = 0x02
- tagCopy4 = 0x03
-)
-
-const (
- checksumSize = 4
- chunkHeaderSize = 4
- magicChunk = "\xff\x06\x00\x00" + magicBody
- magicChunkSnappy = "\xff\x06\x00\x00" + magicBodySnappy
- magicBodySnappy = "sNaPpY"
- magicBody = "S2sTwO"
-
- // maxBlockSize is the maximum size of the input to encodeBlock.
- //
- // For the framing format (Writer type instead of Encode function),
- // this is the maximum uncompressed size of a block.
- maxBlockSize = 4 << 20
-
- // minBlockSize is the minimum size of block setting when creating a writer.
- minBlockSize = 4 << 10
-
- skippableFrameHeader = 4
- maxChunkSize = 1<<24 - 1 // 16777215
-
- // Default block size
- defaultBlockSize = 1 << 20
-
- // maxSnappyBlockSize is the maximum snappy block size.
- maxSnappyBlockSize = 1 << 16
-
- obufHeaderLen = checksumSize + chunkHeaderSize
-)
-
-const (
- chunkTypeCompressedData = 0x00
- chunkTypeUncompressedData = 0x01
- ChunkTypeIndex = 0x99
- chunkTypePadding = 0xfe
- chunkTypeStreamIdentifier = 0xff
-)
-
-var crcTable = crc32.MakeTable(crc32.Castagnoli)
-
-// crc implements the checksum specified in section 3 of
-// https://github.com/google/snappy/blob/master/framing_format.txt
-func crc(b []byte) uint32 {
- c := crc32.Update(0, crcTable, b)
- return c>>15 | c<<17 + 0xa282ead8
-}
-
-// literalExtraSize returns the extra size of encoding n literals.
-// n should be >= 0 and <= math.MaxUint32.
-func literalExtraSize(n int64) int64 {
- if n == 0 {
- return 0
- }
- switch {
- case n < 60:
- return 1
- case n < 1<<8:
- return 2
- case n < 1<<16:
- return 3
- case n < 1<<24:
- return 4
- default:
- return 5
- }
-}
-
-type byter interface {
- Bytes() []byte
-}
-
-var _ byter = &bytes.Buffer{}