diff options
author | 2024-04-02 11:03:40 +0100 | |
---|---|---|
committer | 2024-04-02 12:03:40 +0200 | |
commit | adf345f1ec0cb76a0df94a4505143d891659cba9 (patch) | |
tree | e0cca289c0a50f30191d4b65a2c336704570e470 /vendor/github.com | |
parent | [feature] Option to hide followers/following (#2788) (diff) | |
download | gotosocial-adf345f1ec0cb76a0df94a4505143d891659cba9.tar.xz |
[chore] bump go structr cache version -> v0.6.0 (#2773)
* update go-structr library -> v0.6.0, add necessary wrapping types + code changes to support these changes
* update readme with go-structr package changes
* improved wrapping of the SliceCache type
* add code comments for the cache wrapper types
* remove test.out :innocent:
---------
Co-authored-by: tobi <31960611+tsmethurst@users.noreply.github.com>
Diffstat (limited to 'vendor/github.com')
18 files changed, 0 insertions, 4185 deletions
diff --git a/vendor/github.com/zeebo/xxh3/.gitignore b/vendor/github.com/zeebo/xxh3/.gitignore deleted file mode 100644 index 928e12f53..000000000 --- a/vendor/github.com/zeebo/xxh3/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -upstream -*.pprof -xxh3.test -.vscode -*.txt -_compat diff --git a/vendor/github.com/zeebo/xxh3/LICENSE b/vendor/github.com/zeebo/xxh3/LICENSE deleted file mode 100644 index 477f8e5e1..000000000 --- a/vendor/github.com/zeebo/xxh3/LICENSE +++ /dev/null @@ -1,25 +0,0 @@ -xxHash Library -Copyright (c) 2012-2014, Yann Collet -Copyright (c) 2019, Jeff Wendling -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/zeebo/xxh3/Makefile b/vendor/github.com/zeebo/xxh3/Makefile deleted file mode 100644 index 8bd78c482..000000000 --- a/vendor/github.com/zeebo/xxh3/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -.PHONY: all vet -all: genasm _compat - -genasm: avo/avx.go avo/sse.go - cd ./avo; go generate gen.go - -clean: - rm accum_vector_avx_amd64.s - rm accum_vector_sse_amd64.s - rm _compat - -upstream/xxhash.o: upstream/xxhash.h - ( cd upstream && make ) - -_compat: _compat.c upstream/xxhash.o - gcc -o _compat _compat.c ./upstream/xxhash.o - -vet: - GOOS=linux GOARCH=386 GO386=softfloat go vet ./... - GOOS=windows GOARCH=386 GO386=softfloat go vet ./... - GOOS=linux GOARCH=amd64 go vet ./... - GOOS=windows GOARCH=amd64 go vet ./... - GOOS=darwin GOARCH=amd64 go vet ./... - GOOS=linux GOARCH=arm go vet ./... - GOOS=linux GOARCH=arm64 go vet ./... - GOOS=windows GOARCH=arm64 go vet ./... - GOOS=darwin GOARCH=arm64 go vet ./...
\ No newline at end of file diff --git a/vendor/github.com/zeebo/xxh3/README.md b/vendor/github.com/zeebo/xxh3/README.md deleted file mode 100644 index 4633fc03a..000000000 --- a/vendor/github.com/zeebo/xxh3/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# XXH3 -[](https://godoc.org/github.com/zeebo/xxh3) -[](https://sourcegraph.com/github.com/zeebo/xxh3?badge) -[](https://goreportcard.com/report/github.com/zeebo/xxh3) - -This package is a port of the [xxh3](https://github.com/Cyan4973/xxHash) library to Go. - -Upstream has fixed the output as of v0.8.0, and this package matches that. - ---- - -# Benchmarks - -Run on my `i7-8850H CPU @ 2.60GHz` - -## Small Sizes - -| Bytes | Rate | -|-----------|--------------------------------------| -|` 0 ` |` 0.74 ns/op ` | -|` 1-3 ` |` 4.19 ns/op (0.24 GB/s - 0.71 GB/s) `| -|` 4-8 ` |` 4.16 ns/op (0.97 GB/s - 1.98 GB/s) `| -|` 9-16 ` |` 4.46 ns/op (2.02 GB/s - 3.58 GB/s) `| -|` 17-32 ` |` 6.22 ns/op (2.76 GB/s - 5.15 GB/s) `| -|` 33-64 ` |` 8.00 ns/op (4.13 GB/s - 8.13 GB/s) `| -|` 65-96 ` |` 11.0 ns/op (5.91 GB/s - 8.84 GB/s) `| -|` 97-128 ` |` 12.8 ns/op (7.68 GB/s - 10.0 GB/s) `| - -## Large Sizes - -| Bytes | Rate | SSE2 Rate | AVX2 Rate | -|---------|--------------------------|--------------------------|--------------------------| -|` 129 ` |` 13.6 ns/op (9.45 GB/s) `| | | -|` 240 ` |` 23.8 ns/op (10.1 GB/s) `| | | -|` 241 ` |` 40.5 ns/op (5.97 GB/s) `|` 23.3 ns/op (10.4 GB/s) `|` 20.1 ns/op (12.0 GB/s) `| -|` 512 ` |` 69.8 ns/op (7.34 GB/s) `|` 30.4 ns/op (16.9 GB/s) `|` 24.7 ns/op (20.7 GB/s) `| -|` 1024 ` |` 132 ns/op (7.77 GB/s) `|` 48.9 ns/op (20.9 GB/s) `|` 37.7 ns/op (27.2 GB/s) `| -|` 100KB `|` 13.0 us/op (7.88 GB/s) `|` 4.05 us/op (25.3 GB/s) `|` 2.31 us/op (44.3 GB/s) `| diff --git a/vendor/github.com/zeebo/xxh3/_compat.c b/vendor/github.com/zeebo/xxh3/_compat.c deleted file mode 100644 index fda9f36ff..000000000 --- a/vendor/github.com/zeebo/xxh3/_compat.c +++ /dev/null @@ -1,39 +0,0 @@ -#include "upstream/xxhash.h" -#include <stdio.h> - -int main() { - unsigned char buf[4096]; - for (int i = 0; i < 4096; i++) { - buf[i] = (unsigned char)((i+1)%251); - } - - printf("var testVecs64 = []uint64{\n"); - for (int i = 0; i < 4096; i++) { - if (i % 4 == 0) { - printf("\t"); - } - - uint64_t h = XXH3_64bits(buf, (size_t)i); - printf("0x%lx, ", h); - - if (i % 4 == 3) { - printf("\n\t"); - } - } - printf("}\n\n"); - - printf("var testVecs128 = [][2]uint64{\n"); - for (int i = 0; i < 4096; i++) { - if (i % 4 == 0) { - printf("\t"); - } - - XXH128_hash_t h = XXH3_128bits(buf, (size_t)i); - printf("{0x%lx, 0x%lx}, ", h.high64, h.low64); - - if (i % 4 == 3) { - printf("\n"); - } - } - printf("}\n\n"); -} diff --git a/vendor/github.com/zeebo/xxh3/accum_generic.go b/vendor/github.com/zeebo/xxh3/accum_generic.go deleted file mode 100644 index b1be78507..000000000 --- a/vendor/github.com/zeebo/xxh3/accum_generic.go +++ /dev/null @@ -1,542 +0,0 @@ -package xxh3 - -// avx512Switch is the size at which the avx512 code is used. -// Bigger blocks benefit more. -const avx512Switch = 1 << 10 - -func accumScalar(accs *[8]u64, p, secret ptr, l u64) { - if secret != key { - accumScalarSeed(accs, p, secret, l) - return - } - for l > _block { - k := secret - - // accs - for i := 0; i < 16; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(k, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(k, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(k, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(k, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(k, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(k, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(k, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(k, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - l -= _stripe - if l > 0 { - p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) - } - } - - // scramble accs - accs[0] ^= accs[0] >> 47 - accs[0] ^= key64_128 - accs[0] *= prime32_1 - - accs[1] ^= accs[1] >> 47 - accs[1] ^= key64_136 - accs[1] *= prime32_1 - - accs[2] ^= accs[2] >> 47 - accs[2] ^= key64_144 - accs[2] *= prime32_1 - - accs[3] ^= accs[3] >> 47 - accs[3] ^= key64_152 - accs[3] *= prime32_1 - - accs[4] ^= accs[4] >> 47 - accs[4] ^= key64_160 - accs[4] *= prime32_1 - - accs[5] ^= accs[5] >> 47 - accs[5] ^= key64_168 - accs[5] *= prime32_1 - - accs[6] ^= accs[6] >> 47 - accs[6] ^= key64_176 - accs[6] *= prime32_1 - - accs[7] ^= accs[7] >> 47 - accs[7] ^= key64_184 - accs[7] *= prime32_1 - } - - if l > 0 { - t, k := (l-1)/_stripe, secret - - for i := u64(0); i < t; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(k, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(k, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(k, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(k, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(k, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(k, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(k, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(k, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - l -= _stripe - if l > 0 { - p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) - } - } - - if l > 0 { - p = ptr(ui(p) - uintptr(_stripe-l)) - - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ key64_121 - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ key64_129 - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ key64_137 - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ key64_145 - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ key64_153 - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ key64_161 - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ key64_169 - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ key64_177 - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - } - } -} - -func accumBlockScalar(accs *[8]u64, p, secret ptr) { - if secret != key { - accumBlockScalarSeed(accs, p, secret) - return - } - // accs - for i := 0; i < 16; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(secret, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(secret, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(secret, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(secret, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(secret, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(secret, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(secret, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(secret, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8) - } - - // scramble accs - accs[0] ^= accs[0] >> 47 - accs[0] ^= key64_128 - accs[0] *= prime32_1 - - accs[1] ^= accs[1] >> 47 - accs[1] ^= key64_136 - accs[1] *= prime32_1 - - accs[2] ^= accs[2] >> 47 - accs[2] ^= key64_144 - accs[2] *= prime32_1 - - accs[3] ^= accs[3] >> 47 - accs[3] ^= key64_152 - accs[3] *= prime32_1 - - accs[4] ^= accs[4] >> 47 - accs[4] ^= key64_160 - accs[4] *= prime32_1 - - accs[5] ^= accs[5] >> 47 - accs[5] ^= key64_168 - accs[5] *= prime32_1 - - accs[6] ^= accs[6] >> 47 - accs[6] ^= key64_176 - accs[6] *= prime32_1 - - accs[7] ^= accs[7] >> 47 - accs[7] ^= key64_184 - accs[7] *= prime32_1 -} - -// accumScalarSeed should be used with custom key. -func accumScalarSeed(accs *[8]u64, p, secret ptr, l u64) { - for l > _block { - k := secret - - // accs - for i := 0; i < 16; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(k, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(k, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(k, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(k, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(k, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(k, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(k, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(k, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - l -= _stripe - if l > 0 { - p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) - } - } - - // scramble accs - accs[0] ^= accs[0] >> 47 - accs[0] ^= readU64(secret, 128) - accs[0] *= prime32_1 - - accs[1] ^= accs[1] >> 47 - accs[1] ^= readU64(secret, 136) - accs[1] *= prime32_1 - - accs[2] ^= accs[2] >> 47 - accs[2] ^= readU64(secret, 144) - accs[2] *= prime32_1 - - accs[3] ^= accs[3] >> 47 - accs[3] ^= readU64(secret, 152) - accs[3] *= prime32_1 - - accs[4] ^= accs[4] >> 47 - accs[4] ^= readU64(secret, 160) - accs[4] *= prime32_1 - - accs[5] ^= accs[5] >> 47 - accs[5] ^= readU64(secret, 168) - accs[5] *= prime32_1 - - accs[6] ^= accs[6] >> 47 - accs[6] ^= readU64(secret, 176) - accs[6] *= prime32_1 - - accs[7] ^= accs[7] >> 47 - accs[7] ^= readU64(secret, 184) - accs[7] *= prime32_1 - } - - if l > 0 { - t, k := (l-1)/_stripe, secret - - for i := u64(0); i < t; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(k, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(k, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(k, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(k, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(k, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(k, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(k, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(k, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - l -= _stripe - if l > 0 { - p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) - } - } - - if l > 0 { - p = ptr(ui(p) - uintptr(_stripe-l)) - - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(secret, 121) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(secret, 129) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(secret, 137) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(secret, 145) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(secret, 153) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(secret, 161) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(secret, 169) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(secret, 177) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - } - } -} - -// accumBlockScalarSeed should be used with custom key. -func accumBlockScalarSeed(accs *[8]u64, p, secret ptr) { - // accs - { - secret := secret - for i := 0; i < 16; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(secret, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(secret, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(secret, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(secret, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(secret, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(secret, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(secret, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(secret, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8) - } - } - - // scramble accs - accs[0] ^= accs[0] >> 47 - accs[0] ^= readU64(secret, 128) - accs[0] *= prime32_1 - - accs[1] ^= accs[1] >> 47 - accs[1] ^= readU64(secret, 136) - accs[1] *= prime32_1 - - accs[2] ^= accs[2] >> 47 - accs[2] ^= readU64(secret, 144) - accs[2] *= prime32_1 - - accs[3] ^= accs[3] >> 47 - accs[3] ^= readU64(secret, 152) - accs[3] *= prime32_1 - - accs[4] ^= accs[4] >> 47 - accs[4] ^= readU64(secret, 160) - accs[4] *= prime32_1 - - accs[5] ^= accs[5] >> 47 - accs[5] ^= readU64(secret, 168) - accs[5] *= prime32_1 - - accs[6] ^= accs[6] >> 47 - accs[6] ^= readU64(secret, 176) - accs[6] *= prime32_1 - - accs[7] ^= accs[7] >> 47 - accs[7] ^= readU64(secret, 184) - accs[7] *= prime32_1 -} diff --git a/vendor/github.com/zeebo/xxh3/accum_stubs_amd64.go b/vendor/github.com/zeebo/xxh3/accum_stubs_amd64.go deleted file mode 100644 index 9baff6c41..000000000 --- a/vendor/github.com/zeebo/xxh3/accum_stubs_amd64.go +++ /dev/null @@ -1,40 +0,0 @@ -package xxh3 - -import ( - "unsafe" - - "github.com/klauspost/cpuid/v2" -) - -var ( - hasAVX2 = cpuid.CPU.Has(cpuid.AVX2) - hasSSE2 = cpuid.CPU.Has(cpuid.SSE2) // Always true on amd64 - hasAVX512 = cpuid.CPU.Has(cpuid.AVX512F) -) - -//go:noescape -func accumAVX2(acc *[8]u64, data, key unsafe.Pointer, len u64) - -//go:noescape -func accumAVX512(acc *[8]u64, data, key unsafe.Pointer, len u64) - -//go:noescape -func accumSSE(acc *[8]u64, data, key unsafe.Pointer, len u64) - -//go:noescape -func accumBlockAVX2(acc *[8]u64, data, key unsafe.Pointer) - -//go:noescape -func accumBlockSSE(acc *[8]u64, data, key unsafe.Pointer) - -func withOverrides(avx512, avx2, sse2 bool, cb func()) { - avx512Orig, avx2Orig, sse2Orig := hasAVX512, hasAVX2, hasSSE2 - hasAVX512, hasAVX2, hasSSE2 = avx512, avx2, sse2 - defer func() { hasAVX512, hasAVX2, hasSSE2 = avx512Orig, avx2Orig, sse2Orig }() - cb() -} - -func withAVX512(cb func()) { withOverrides(hasAVX512, false, false, cb) } -func withAVX2(cb func()) { withOverrides(false, hasAVX2, false, cb) } -func withSSE2(cb func()) { withOverrides(false, false, hasSSE2, cb) } -func withGeneric(cb func()) { withOverrides(false, false, false, cb) } diff --git a/vendor/github.com/zeebo/xxh3/accum_stubs_other.go b/vendor/github.com/zeebo/xxh3/accum_stubs_other.go deleted file mode 100644 index 93bf6258a..000000000 --- a/vendor/github.com/zeebo/xxh3/accum_stubs_other.go +++ /dev/null @@ -1,25 +0,0 @@ -//go:build !amd64 -// +build !amd64 - -package xxh3 - -import ( - "unsafe" -) - -const ( - hasAVX2 = false - hasSSE2 = false - hasAVX512 = false -) - -func accumAVX2(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") } -func accumSSE(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") } -func accumBlockAVX2(acc *[8]u64, data, key unsafe.Pointer) { panic("unreachable") } -func accumBlockSSE(acc *[8]u64, data, key unsafe.Pointer) { panic("unreachable") } -func accumAVX512(acc *[8]u64, data, key unsafe.Pointer, len u64) { panic("unreachable") } - -func withAVX512(cb func()) { cb() } -func withAVX2(cb func()) { cb() } -func withSSE2(cb func()) { cb() } -func withGeneric(cb func()) { cb() } diff --git a/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s b/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s deleted file mode 100644 index cfaf9f0a7..000000000 --- a/vendor/github.com/zeebo/xxh3/accum_vector_avx512_amd64.s +++ /dev/null @@ -1,379 +0,0 @@ -// Code generated by command: go run gen.go -avx512 -out ../accum_vector_avx512_amd64.s -pkg xxh3. DO NOT EDIT. - -#include "textflag.h" - -DATA prime_avx512<>+0(SB)/8, $0x000000009e3779b1 -DATA prime_avx512<>+8(SB)/8, $0x000000009e3779b1 -DATA prime_avx512<>+16(SB)/8, $0x000000009e3779b1 -DATA prime_avx512<>+24(SB)/8, $0x000000009e3779b1 -DATA prime_avx512<>+32(SB)/8, $0x000000009e3779b1 -DATA prime_avx512<>+40(SB)/8, $0x000000009e3779b1 -DATA prime_avx512<>+48(SB)/8, $0x000000009e3779b1 -DATA prime_avx512<>+56(SB)/8, $0x000000009e3779b1 -GLOBL prime_avx512<>(SB), RODATA|NOPTR, $64 - -// func accumAVX512(acc *[8]uint64, data *byte, key *byte, len uint64) -// Requires: AVX, AVX512F, MMX+ -TEXT ·accumAVX512(SB), NOSPLIT, $0-32 - MOVQ acc+0(FP), AX - MOVQ data+8(FP), CX - MOVQ key+16(FP), DX - MOVQ len+24(FP), BX - VMOVDQU64 (AX), Z1 - VMOVDQU64 prime_avx512<>+0(SB), Z0 - VMOVDQU64 (DX), Z2 - VMOVDQU64 8(DX), Z3 - VMOVDQU64 16(DX), Z4 - VMOVDQU64 24(DX), Z5 - VMOVDQU64 32(DX), Z6 - VMOVDQU64 40(DX), Z7 - VMOVDQU64 48(DX), Z8 - VMOVDQU64 56(DX), Z9 - VMOVDQU64 64(DX), Z10 - VMOVDQU64 72(DX), Z11 - VMOVDQU64 80(DX), Z12 - VMOVDQU64 88(DX), Z13 - VMOVDQU64 96(DX), Z14 - VMOVDQU64 104(DX), Z15 - VMOVDQU64 112(DX), Z16 - VMOVDQU64 120(DX), Z17 - VMOVDQU64 128(DX), Z18 - VMOVDQU64 121(DX), Z19 - -accum_large: - CMPQ BX, $0x00000400 - JLE accum - VMOVDQU64 (CX), Z20 - PREFETCHT0 1024(CX) - VPXORD Z2, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 64(CX), Z20 - PREFETCHT0 1088(CX) - VPXORD Z3, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 128(CX), Z20 - PREFETCHT0 1152(CX) - VPXORD Z4, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 192(CX), Z20 - PREFETCHT0 1216(CX) - VPXORD Z5, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 256(CX), Z20 - PREFETCHT0 1280(CX) - VPXORD Z6, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 320(CX), Z20 - PREFETCHT0 1344(CX) - VPXORD Z7, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 384(CX), Z20 - PREFETCHT0 1408(CX) - VPXORD Z8, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 448(CX), Z20 - PREFETCHT0 1472(CX) - VPXORD Z9, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 512(CX), Z20 - PREFETCHT0 1536(CX) - VPXORD Z10, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 576(CX), Z20 - PREFETCHT0 1600(CX) - VPXORD Z11, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 640(CX), Z20 - PREFETCHT0 1664(CX) - VPXORD Z12, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 704(CX), Z20 - PREFETCHT0 1728(CX) - VPXORD Z13, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 768(CX), Z20 - PREFETCHT0 1792(CX) - VPXORD Z14, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 832(CX), Z20 - PREFETCHT0 1856(CX) - VPXORD Z15, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 896(CX), Z20 - PREFETCHT0 1920(CX) - VPXORD Z16, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - VMOVDQU64 960(CX), Z20 - PREFETCHT0 1984(CX) - VPXORD Z17, Z20, Z21 - VPSHUFD $0x31, Z21, Z22 - VPMULUDQ Z21, Z22, Z21 - VPSHUFD $0x4e, Z20, Z20 - VPADDQ Z1, Z20, Z1 - VPADDQ Z1, Z21, Z1 - ADDQ $0x00000400, CX - SUBQ $0x00000400, BX - VPSRLQ $0x2f, Z1, Z20 - VPTERNLOGD $0x96, Z1, Z18, Z20 - VPMULUDQ Z0, Z20, Z1 - VPSHUFD $0xf5, Z20, Z20 - VPMULUDQ Z0, Z20, Z20 - VPSLLQ $0x20, Z20, Z20 - VPADDQ Z1, Z20, Z1 - JMP accum_large - -accum: - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z2, Z0, Z2 - VPSHUFD $0x31, Z2, Z18 - VPMULUDQ Z2, Z18, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z3, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z4, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z5, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z6, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z7, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z8, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z9, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z10, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z11, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z12, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z13, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z14, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z15, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z16, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - CMPQ BX, $0x40 - JLE finalize - VMOVDQU64 (CX), Z0 - VPXORD Z17, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - ADDQ $0x00000040, CX - SUBQ $0x00000040, BX - -finalize: - CMPQ BX, $0x00 - JE return - SUBQ $0x40, CX - ADDQ BX, CX - VMOVDQU64 (CX), Z0 - VPXORD Z19, Z0, Z2 - VPSHUFD $0x31, Z2, Z3 - VPMULUDQ Z2, Z3, Z2 - VPSHUFD $0x4e, Z0, Z0 - VPADDQ Z1, Z0, Z1 - VPADDQ Z1, Z2, Z1 - -return: - VMOVDQU64 Z1, (AX) - VZEROUPPER - RET diff --git a/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s b/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s deleted file mode 100644 index b53c1521f..000000000 --- a/vendor/github.com/zeebo/xxh3/accum_vector_avx_amd64.s +++ /dev/null @@ -1,586 +0,0 @@ -// Code generated by command: go run gen.go -avx -out ../accum_vector_avx_amd64.s -pkg xxh3. DO NOT EDIT. - -#include "textflag.h" - -DATA prime_avx<>+0(SB)/8, $0x000000009e3779b1 -DATA prime_avx<>+8(SB)/8, $0x000000009e3779b1 -DATA prime_avx<>+16(SB)/8, $0x000000009e3779b1 -DATA prime_avx<>+24(SB)/8, $0x000000009e3779b1 -GLOBL prime_avx<>(SB), RODATA|NOPTR, $32 - -// func accumAVX2(acc *[8]uint64, data *byte, key *byte, len uint64) -// Requires: AVX, AVX2, MMX+ -TEXT ·accumAVX2(SB), NOSPLIT, $0-32 - MOVQ acc+0(FP), AX - MOVQ data+8(FP), CX - MOVQ key+16(FP), DX - MOVQ key+16(FP), BX - MOVQ len+24(FP), SI - VMOVDQU (AX), Y1 - VMOVDQU 32(AX), Y2 - VMOVDQU prime_avx<>+0(SB), Y0 - -accum_large: - CMPQ SI, $0x00000400 - JLE accum - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y6 - PREFETCHT0 512(CX) - VPXOR (DX), Y3, Y4 - VPXOR 32(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y6 - PREFETCHT0 576(CX) - VPXOR 8(DX), Y3, Y4 - VPXOR 40(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y6 - PREFETCHT0 640(CX) - VPXOR 16(DX), Y3, Y4 - VPXOR 48(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y6 - PREFETCHT0 704(CX) - VPXOR 24(DX), Y3, Y4 - VPXOR 56(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y6 - PREFETCHT0 768(CX) - VPXOR 32(DX), Y3, Y4 - VPXOR 64(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y6 - PREFETCHT0 832(CX) - VPXOR 40(DX), Y3, Y4 - VPXOR 72(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y6 - PREFETCHT0 896(CX) - VPXOR 48(DX), Y3, Y4 - VPXOR 80(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y6 - PREFETCHT0 960(CX) - VPXOR 56(DX), Y3, Y4 - VPXOR 88(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y6 - PREFETCHT0 1024(CX) - VPXOR 64(DX), Y3, Y4 - VPXOR 96(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y6 - PREFETCHT0 1088(CX) - VPXOR 72(DX), Y3, Y4 - VPXOR 104(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y6 - PREFETCHT0 1152(CX) - VPXOR 80(DX), Y3, Y4 - VPXOR 112(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y6 - PREFETCHT0 1216(CX) - VPXOR 88(DX), Y3, Y4 - VPXOR 120(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y6 - PREFETCHT0 1280(CX) - VPXOR 96(DX), Y3, Y4 - VPXOR 128(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y6 - PREFETCHT0 1344(CX) - VPXOR 104(DX), Y3, Y4 - VPXOR 136(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y6 - PREFETCHT0 1408(CX) - VPXOR 112(DX), Y3, Y4 - VPXOR 144(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y6 - PREFETCHT0 1472(CX) - VPXOR 120(DX), Y3, Y4 - VPXOR 152(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - ADDQ $0x00000400, CX - SUBQ $0x00000400, SI - VPSRLQ $0x2f, Y1, Y3 - VPXOR Y1, Y3, Y3 - VPXOR 128(DX), Y3, Y3 - VPMULUDQ Y0, Y3, Y1 - VPSHUFD $0xf5, Y3, Y3 - VPMULUDQ Y0, Y3, Y3 - VPSLLQ $0x20, Y3, Y3 - VPADDQ Y1, Y3, Y1 - VPSRLQ $0x2f, Y2, Y3 - VPXOR Y2, Y3, Y3 - VPXOR 160(DX), Y3, Y3 - VPMULUDQ Y0, Y3, Y2 - VPSHUFD $0xf5, Y3, Y3 - VPMULUDQ Y0, Y3, Y3 - VPSLLQ $0x20, Y3, Y3 - VPADDQ Y2, Y3, Y2 - JMP accum_large - -accum: - CMPQ SI, $0x40 - JLE finalize - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y5 - VPXOR (BX), Y0, Y3 - VPXOR 32(BX), Y5, Y6 - VPSHUFD $0x31, Y3, Y4 - VPSHUFD $0x31, Y6, Y7 - VPMULUDQ Y3, Y4, Y3 - VPMULUDQ Y6, Y7, Y6 - VPSHUFD $0x4e, Y0, Y0 - VPSHUFD $0x4e, Y5, Y5 - VPADDQ Y1, Y0, Y1 - VPADDQ Y1, Y3, Y1 - VPADDQ Y2, Y5, Y2 - VPADDQ Y2, Y6, Y2 - ADDQ $0x00000040, CX - SUBQ $0x00000040, SI - ADDQ $0x00000008, BX - JMP accum - -finalize: - CMPQ SI, $0x00 - JE return - SUBQ $0x40, CX - ADDQ SI, CX - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y5 - VPXOR 121(DX), Y0, Y3 - VPXOR 153(DX), Y5, Y6 - VPSHUFD $0x31, Y3, Y4 - VPSHUFD $0x31, Y6, Y7 - VPMULUDQ Y3, Y4, Y3 - VPMULUDQ Y6, Y7, Y6 - VPSHUFD $0x4e, Y0, Y0 - VPSHUFD $0x4e, Y5, Y5 - VPADDQ Y1, Y0, Y1 - VPADDQ Y1, Y3, Y1 - VPADDQ Y2, Y5, Y2 - VPADDQ Y2, Y6, Y2 - -return: - VMOVDQU Y1, (AX) - VMOVDQU Y2, 32(AX) - VZEROUPPER - RET - -// func accumBlockAVX2(acc *[8]uint64, data *byte, key *byte) -// Requires: AVX, AVX2 -TEXT ·accumBlockAVX2(SB), NOSPLIT, $0-24 - MOVQ acc+0(FP), AX - MOVQ data+8(FP), CX - MOVQ key+16(FP), DX - VMOVDQU (AX), Y1 - VMOVDQU 32(AX), Y2 - VMOVDQU prime_avx<>+0(SB), Y0 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y6 - VPXOR (DX), Y3, Y4 - VPXOR 32(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y6 - VPXOR 8(DX), Y3, Y4 - VPXOR 40(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y6 - VPXOR 16(DX), Y3, Y4 - VPXOR 48(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y6 - VPXOR 24(DX), Y3, Y4 - VPXOR 56(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y6 - VPXOR 32(DX), Y3, Y4 - VPXOR 64(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y6 - VPXOR 40(DX), Y3, Y4 - VPXOR 72(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y6 - VPXOR 48(DX), Y3, Y4 - VPXOR 80(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y6 - VPXOR 56(DX), Y3, Y4 - VPXOR 88(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y6 - VPXOR 64(DX), Y3, Y4 - VPXOR 96(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y6 - VPXOR 72(DX), Y3, Y4 - VPXOR 104(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y6 - VPXOR 80(DX), Y3, Y4 - VPXOR 112(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y6 - VPXOR 88(DX), Y3, Y4 - VPXOR 120(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y6 - VPXOR 96(DX), Y3, Y4 - VPXOR 128(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y6 - VPXOR 104(DX), Y3, Y4 - VPXOR 136(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y6 - VPXOR 112(DX), Y3, Y4 - VPXOR 144(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y6 - VPXOR 120(DX), Y3, Y4 - VPXOR 152(DX), Y6, Y7 - VPSHUFD $0x31, Y4, Y5 - VPSHUFD $0x31, Y7, Y8 - VPMULUDQ Y4, Y5, Y4 - VPMULUDQ Y7, Y8, Y7 - VPSHUFD $0x4e, Y3, Y3 - VPSHUFD $0x4e, Y6, Y6 - VPADDQ Y1, Y3, Y1 - VPADDQ Y1, Y4, Y1 - VPADDQ Y2, Y6, Y2 - VPADDQ Y2, Y7, Y2 - VPSRLQ $0x2f, Y1, Y3 - VPXOR Y1, Y3, Y3 - VPXOR 128(DX), Y3, Y3 - VPMULUDQ Y0, Y3, Y1 - VPSHUFD $0xf5, Y3, Y3 - VPMULUDQ Y0, Y3, Y3 - VPSLLQ $0x20, Y3, Y3 - VPADDQ Y1, Y3, Y1 - VPSRLQ $0x2f, Y2, Y3 - VPXOR Y2, Y3, Y3 - VPXOR 160(DX), Y3, Y3 - VPMULUDQ Y0, Y3, Y2 - VPSHUFD $0xf5, Y3, Y3 - VPMULUDQ Y0, Y3, Y3 - VPSLLQ $0x20, Y3, Y3 - VPADDQ Y2, Y3, Y2 - VMOVDQU Y1, (AX) - VMOVDQU Y2, 32(AX) - VZEROUPPER - RET diff --git a/vendor/github.com/zeebo/xxh3/accum_vector_sse_amd64.s b/vendor/github.com/zeebo/xxh3/accum_vector_sse_amd64.s deleted file mode 100644 index ba670e560..000000000 --- a/vendor/github.com/zeebo/xxh3/accum_vector_sse_amd64.s +++ /dev/null @@ -1,1236 +0,0 @@ -// Code generated by command: go run gen.go -sse -out ../accum_vector_sse_amd64.s -pkg xxh3. DO NOT EDIT. - -#include "textflag.h" - -DATA prime_sse<>+0(SB)/4, $0x9e3779b1 -DATA prime_sse<>+4(SB)/4, $0x9e3779b1 -DATA prime_sse<>+8(SB)/4, $0x9e3779b1 -DATA prime_sse<>+12(SB)/4, $0x9e3779b1 -GLOBL prime_sse<>(SB), RODATA|NOPTR, $16 - -// func accumSSE(acc *[8]uint64, data *byte, key *byte, len uint64) -// Requires: SSE2 -TEXT ·accumSSE(SB), NOSPLIT, $0-32 - MOVQ acc+0(FP), AX - MOVQ data+8(FP), CX - MOVQ key+16(FP), DX - MOVQ key+16(FP), BX - MOVQ len+24(FP), SI - MOVOU (AX), X1 - MOVOU 16(AX), X2 - MOVOU 32(AX), X3 - MOVOU 48(AX), X4 - MOVOU prime_sse<>+0(SB), X0 - -accum_large: - CMPQ SI, $0x00000400 - JLE accum - MOVOU (CX), X5 - MOVOU (DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 16(CX), X5 - MOVOU 16(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 32(CX), X5 - MOVOU 32(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 48(CX), X5 - MOVOU 48(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 64(CX), X5 - MOVOU 8(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 80(CX), X5 - MOVOU 24(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 96(CX), X5 - MOVOU 40(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 112(CX), X5 - MOVOU 56(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 128(CX), X5 - MOVOU 16(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 144(CX), X5 - MOVOU 32(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 160(CX), X5 - MOVOU 48(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 176(CX), X5 - MOVOU 64(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 192(CX), X5 - MOVOU 24(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 208(CX), X5 - MOVOU 40(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 224(CX), X5 - MOVOU 56(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 240(CX), X5 - MOVOU 72(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 256(CX), X5 - MOVOU 32(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 272(CX), X5 - MOVOU 48(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 288(CX), X5 - MOVOU 64(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 304(CX), X5 - MOVOU 80(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 320(CX), X5 - MOVOU 40(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 336(CX), X5 - MOVOU 56(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 352(CX), X5 - MOVOU 72(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 368(CX), X5 - MOVOU 88(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 384(CX), X5 - MOVOU 48(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 400(CX), X5 - MOVOU 64(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 416(CX), X5 - MOVOU 80(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 432(CX), X5 - MOVOU 96(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 448(CX), X5 - MOVOU 56(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 464(CX), X5 - MOVOU 72(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 480(CX), X5 - MOVOU 88(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 496(CX), X5 - MOVOU 104(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 512(CX), X5 - MOVOU 64(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 528(CX), X5 - MOVOU 80(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 544(CX), X5 - MOVOU 96(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 560(CX), X5 - MOVOU 112(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 576(CX), X5 - MOVOU 72(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 592(CX), X5 - MOVOU 88(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 608(CX), X5 - MOVOU 104(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 624(CX), X5 - MOVOU 120(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 640(CX), X5 - MOVOU 80(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 656(CX), X5 - MOVOU 96(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 672(CX), X5 - MOVOU 112(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 688(CX), X5 - MOVOU 128(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 704(CX), X5 - MOVOU 88(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 720(CX), X5 - MOVOU 104(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 736(CX), X5 - MOVOU 120(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 752(CX), X5 - MOVOU 136(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 768(CX), X5 - MOVOU 96(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 784(CX), X5 - MOVOU 112(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 800(CX), X5 - MOVOU 128(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 816(CX), X5 - MOVOU 144(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 832(CX), X5 - MOVOU 104(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 848(CX), X5 - MOVOU 120(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 864(CX), X5 - MOVOU 136(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 880(CX), X5 - MOVOU 152(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 896(CX), X5 - MOVOU 112(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 912(CX), X5 - MOVOU 128(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 928(CX), X5 - MOVOU 144(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 944(CX), X5 - MOVOU 160(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 960(CX), X5 - MOVOU 120(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 976(CX), X5 - MOVOU 136(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 992(CX), X5 - MOVOU 152(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 1008(CX), X5 - MOVOU 168(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - ADDQ $0x00000400, CX - SUBQ $0x00000400, SI - MOVOU X1, X5 - PSRLQ $0x2f, X5 - PXOR X5, X1 - MOVOU 128(DX), X5 - PXOR X5, X1 - PSHUFD $0xf5, X1, X5 - PMULULQ X0, X1 - PMULULQ X0, X5 - PSLLQ $0x20, X5 - PADDQ X5, X1 - MOVOU X2, X5 - PSRLQ $0x2f, X5 - PXOR X5, X2 - MOVOU 144(DX), X5 - PXOR X5, X2 - PSHUFD $0xf5, X2, X5 - PMULULQ X0, X2 - PMULULQ X0, X5 - PSLLQ $0x20, X5 - PADDQ X5, X2 - MOVOU X3, X5 - PSRLQ $0x2f, X5 - PXOR X5, X3 - MOVOU 160(DX), X5 - PXOR X5, X3 - PSHUFD $0xf5, X3, X5 - PMULULQ X0, X3 - PMULULQ X0, X5 - PSLLQ $0x20, X5 - PADDQ X5, X3 - MOVOU X4, X5 - PSRLQ $0x2f, X5 - PXOR X5, X4 - MOVOU 176(DX), X5 - PXOR X5, X4 - PSHUFD $0xf5, X4, X5 - PMULULQ X0, X4 - PMULULQ X0, X5 - PSLLQ $0x20, X5 - PADDQ X5, X4 - JMP accum_large - -accum: - CMPQ SI, $0x40 - JLE finalize - MOVOU (CX), X0 - MOVOU (BX), X5 - PXOR X0, X5 - PSHUFD $0x31, X5, X6 - PMULULQ X5, X6 - PSHUFD $0x4e, X0, X0 - PADDQ X0, X1 - PADDQ X6, X1 - MOVOU 16(CX), X0 - MOVOU 16(BX), X5 - PXOR X0, X5 - PSHUFD $0x31, X5, X6 - PMULULQ X5, X6 - PSHUFD $0x4e, X0, X0 - PADDQ X0, X2 - PADDQ X6, X2 - MOVOU 32(CX), X0 - MOVOU 32(BX), X5 - PXOR X0, X5 - PSHUFD $0x31, X5, X6 - PMULULQ X5, X6 - PSHUFD $0x4e, X0, X0 - PADDQ X0, X3 - PADDQ X6, X3 - MOVOU 48(CX), X0 - MOVOU 48(BX), X5 - PXOR X0, X5 - PSHUFD $0x31, X5, X6 - PMULULQ X5, X6 - PSHUFD $0x4e, X0, X0 - PADDQ X0, X4 - PADDQ X6, X4 - ADDQ $0x00000040, CX - SUBQ $0x00000040, SI - ADDQ $0x00000008, BX - JMP accum - -finalize: - CMPQ SI, $0x00 - JE return - SUBQ $0x40, CX - ADDQ SI, CX - MOVOU (CX), X0 - MOVOU 121(DX), X5 - PXOR X0, X5 - PSHUFD $0x31, X5, X6 - PMULULQ X5, X6 - PSHUFD $0x4e, X0, X0 - PADDQ X0, X1 - PADDQ X6, X1 - MOVOU 16(CX), X0 - MOVOU 137(DX), X5 - PXOR X0, X5 - PSHUFD $0x31, X5, X6 - PMULULQ X5, X6 - PSHUFD $0x4e, X0, X0 - PADDQ X0, X2 - PADDQ X6, X2 - MOVOU 32(CX), X0 - MOVOU 153(DX), X5 - PXOR X0, X5 - PSHUFD $0x31, X5, X6 - PMULULQ X5, X6 - PSHUFD $0x4e, X0, X0 - PADDQ X0, X3 - PADDQ X6, X3 - MOVOU 48(CX), X0 - MOVOU 169(DX), X5 - PXOR X0, X5 - PSHUFD $0x31, X5, X6 - PMULULQ X5, X6 - PSHUFD $0x4e, X0, X0 - PADDQ X0, X4 - PADDQ X6, X4 - -return: - MOVOU X1, (AX) - MOVOU X2, 16(AX) - MOVOU X3, 32(AX) - MOVOU X4, 48(AX) - RET - -// func accumBlockSSE(acc *[8]uint64, data *byte, key *byte) -// Requires: SSE2 -TEXT ·accumBlockSSE(SB), NOSPLIT, $0-24 - MOVQ acc+0(FP), AX - MOVQ data+8(FP), CX - MOVQ key+16(FP), DX - MOVOU (AX), X1 - MOVOU 16(AX), X2 - MOVOU 32(AX), X3 - MOVOU 48(AX), X4 - MOVOU prime_sse<>+0(SB), X0 - MOVOU (CX), X5 - MOVOU (DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 16(CX), X5 - MOVOU 16(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 32(CX), X5 - MOVOU 32(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 48(CX), X5 - MOVOU 48(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 64(CX), X5 - MOVOU 8(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 80(CX), X5 - MOVOU 24(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 96(CX), X5 - MOVOU 40(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 112(CX), X5 - MOVOU 56(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 128(CX), X5 - MOVOU 16(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 144(CX), X5 - MOVOU 32(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 160(CX), X5 - MOVOU 48(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 176(CX), X5 - MOVOU 64(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 192(CX), X5 - MOVOU 24(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 208(CX), X5 - MOVOU 40(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 224(CX), X5 - MOVOU 56(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 240(CX), X5 - MOVOU 72(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 256(CX), X5 - MOVOU 32(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 272(CX), X5 - MOVOU 48(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 288(CX), X5 - MOVOU 64(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 304(CX), X5 - MOVOU 80(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 320(CX), X5 - MOVOU 40(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 336(CX), X5 - MOVOU 56(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 352(CX), X5 - MOVOU 72(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 368(CX), X5 - MOVOU 88(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 384(CX), X5 - MOVOU 48(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 400(CX), X5 - MOVOU 64(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 416(CX), X5 - MOVOU 80(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 432(CX), X5 - MOVOU 96(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 448(CX), X5 - MOVOU 56(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 464(CX), X5 - MOVOU 72(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 480(CX), X5 - MOVOU 88(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 496(CX), X5 - MOVOU 104(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 512(CX), X5 - MOVOU 64(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 528(CX), X5 - MOVOU 80(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 544(CX), X5 - MOVOU 96(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 560(CX), X5 - MOVOU 112(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 576(CX), X5 - MOVOU 72(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 592(CX), X5 - MOVOU 88(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 608(CX), X5 - MOVOU 104(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 624(CX), X5 - MOVOU 120(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 640(CX), X5 - MOVOU 80(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 656(CX), X5 - MOVOU 96(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 672(CX), X5 - MOVOU 112(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 688(CX), X5 - MOVOU 128(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 704(CX), X5 - MOVOU 88(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 720(CX), X5 - MOVOU 104(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 736(CX), X5 - MOVOU 120(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 752(CX), X5 - MOVOU 136(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 768(CX), X5 - MOVOU 96(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 784(CX), X5 - MOVOU 112(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 800(CX), X5 - MOVOU 128(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 816(CX), X5 - MOVOU 144(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 832(CX), X5 - MOVOU 104(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 848(CX), X5 - MOVOU 120(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 864(CX), X5 - MOVOU 136(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 880(CX), X5 - MOVOU 152(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 896(CX), X5 - MOVOU 112(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 912(CX), X5 - MOVOU 128(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 928(CX), X5 - MOVOU 144(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 944(CX), X5 - MOVOU 160(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU 960(CX), X5 - MOVOU 120(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X1 - PADDQ X7, X1 - MOVOU 976(CX), X5 - MOVOU 136(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X2 - PADDQ X7, X2 - MOVOU 992(CX), X5 - MOVOU 152(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X3 - PADDQ X7, X3 - MOVOU 1008(CX), X5 - MOVOU 168(DX), X6 - PXOR X5, X6 - PSHUFD $0x31, X6, X7 - PMULULQ X6, X7 - PSHUFD $0x4e, X5, X5 - PADDQ X5, X4 - PADDQ X7, X4 - MOVOU X1, X5 - PSRLQ $0x2f, X5 - PXOR X5, X1 - MOVOU 128(DX), X5 - PXOR X5, X1 - PSHUFD $0xf5, X1, X5 - PMULULQ X0, X1 - PMULULQ X0, X5 - PSLLQ $0x20, X5 - PADDQ X5, X1 - MOVOU X2, X5 - PSRLQ $0x2f, X5 - PXOR X5, X2 - MOVOU 144(DX), X5 - PXOR X5, X2 - PSHUFD $0xf5, X2, X5 - PMULULQ X0, X2 - PMULULQ X0, X5 - PSLLQ $0x20, X5 - PADDQ X5, X2 - MOVOU X3, X5 - PSRLQ $0x2f, X5 - PXOR X5, X3 - MOVOU 160(DX), X5 - PXOR X5, X3 - PSHUFD $0xf5, X3, X5 - PMULULQ X0, X3 - PMULULQ X0, X5 - PSLLQ $0x20, X5 - PADDQ X5, X3 - MOVOU X4, X5 - PSRLQ $0x2f, X5 - PXOR X5, X4 - MOVOU 176(DX), X5 - PXOR X5, X4 - PSHUFD $0xf5, X4, X5 - PMULULQ X0, X4 - PMULULQ X0, X5 - PSLLQ $0x20, X5 - PADDQ X5, X4 - MOVOU X1, (AX) - MOVOU X2, 16(AX) - MOVOU X3, 32(AX) - MOVOU X4, 48(AX) - RET diff --git a/vendor/github.com/zeebo/xxh3/consts.go b/vendor/github.com/zeebo/xxh3/consts.go deleted file mode 100644 index 39ef6e179..000000000 --- a/vendor/github.com/zeebo/xxh3/consts.go +++ /dev/null @@ -1,97 +0,0 @@ -package xxh3 - -const ( - _stripe = 64 - _block = 1024 - - prime32_1 = 2654435761 - prime32_2 = 2246822519 - prime32_3 = 3266489917 - - prime64_1 = 11400714785074694791 - prime64_2 = 14029467366897019727 - prime64_3 = 1609587929392839161 - prime64_4 = 9650029242287828579 - prime64_5 = 2870177450012600261 -) - -var key = ptr(&[...]u8{ - 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe /* 8 */, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, /* 16 */ - 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb /* 24 */, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, /* 32 */ - 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78 /* 40 */, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, /* 48 */ - 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e /* 56 */, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, /* 64 */ - 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb /* 72 */, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, /* 80 */ - 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e /* 88 */, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, /* 96 */ - 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f /* 104 */, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, /* 112 */ - 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31 /* 120 */, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, /* 128 */ - 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3 /* 136 */, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, /* 144 */ - 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49 /* 152 */, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, /* 160 */ - 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc /* 168 */, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, /* 176 */ - 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28 /* 184 */, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, /* 192 */ -}) - -const ( - key64_000 u64 = 0xbe4ba423396cfeb8 - key64_008 u64 = 0x1cad21f72c81017c - key64_016 u64 = 0xdb979083e96dd4de - key64_024 u64 = 0x1f67b3b7a4a44072 - key64_032 u64 = 0x78e5c0cc4ee679cb - key64_040 u64 = 0x2172ffcc7dd05a82 - key64_048 u64 = 0x8e2443f7744608b8 - key64_056 u64 = 0x4c263a81e69035e0 - key64_064 u64 = 0xcb00c391bb52283c - key64_072 u64 = 0xa32e531b8b65d088 - key64_080 u64 = 0x4ef90da297486471 - key64_088 u64 = 0xd8acdea946ef1938 - key64_096 u64 = 0x3f349ce33f76faa8 - key64_104 u64 = 0x1d4f0bc7c7bbdcf9 - key64_112 u64 = 0x3159b4cd4be0518a - key64_120 u64 = 0x647378d9c97e9fc8 - key64_128 u64 = 0xc3ebd33483acc5ea - key64_136 u64 = 0xeb6313faffa081c5 - key64_144 u64 = 0x49daf0b751dd0d17 - key64_152 u64 = 0x9e68d429265516d3 - key64_160 u64 = 0xfca1477d58be162b - key64_168 u64 = 0xce31d07ad1b8f88f - key64_176 u64 = 0x280416958f3acb45 - key64_184 u64 = 0x7e404bbbcafbd7af - - key64_103 u64 = 0x4f0bc7c7bbdcf93f - key64_111 u64 = 0x59b4cd4be0518a1d - key64_119 u64 = 0x7378d9c97e9fc831 - key64_127 u64 = 0xebd33483acc5ea64 - - key64_121 u64 = 0xea647378d9c97e9f - key64_129 u64 = 0xc5c3ebd33483acc5 - key64_137 u64 = 0x17eb6313faffa081 - key64_145 u64 = 0xd349daf0b751dd0d - key64_153 u64 = 0x2b9e68d429265516 - key64_161 u64 = 0x8ffca1477d58be16 - key64_169 u64 = 0x45ce31d07ad1b8f8 - key64_177 u64 = 0xaf280416958f3acb - - key64_011 = 0x6dd4de1cad21f72c - key64_019 = 0xa44072db979083e9 - key64_027 = 0xe679cb1f67b3b7a4 - key64_035 = 0xd05a8278e5c0cc4e - key64_043 = 0x4608b82172ffcc7d - key64_051 = 0x9035e08e2443f774 - key64_059 = 0x52283c4c263a81e6 - key64_067 = 0x65d088cb00c391bb - - key64_117 = 0xd9c97e9fc83159b4 - key64_125 = 0x3483acc5ea647378 - key64_133 = 0xfaffa081c5c3ebd3 - key64_141 = 0xb751dd0d17eb6313 - key64_149 = 0x29265516d349daf0 - key64_157 = 0x7d58be162b9e68d4 - key64_165 = 0x7ad1b8f88ffca147 - key64_173 = 0x958f3acb45ce31d0 -) - -const ( - key32_000 u32 = 0xbe4ba423 - key32_004 u32 = 0x396cfeb8 - key32_008 u32 = 0x1cad21f7 - key32_012 u32 = 0x2c81017c -) diff --git a/vendor/github.com/zeebo/xxh3/hash128.go b/vendor/github.com/zeebo/xxh3/hash128.go deleted file mode 100644 index 0040a21bb..000000000 --- a/vendor/github.com/zeebo/xxh3/hash128.go +++ /dev/null @@ -1,253 +0,0 @@ -package xxh3 - -import ( - "math/bits" -) - -// Hash128 returns the 128-bit hash of the byte slice. -func Hash128(b []byte) Uint128 { - return hashAny128(*(*str)(ptr(&b))) -} - -// HashString128 returns the 128-bit hash of the string slice. -func HashString128(s string) Uint128 { - return hashAny128(*(*str)(ptr(&s))) -} - -func hashAny128(s str) (acc u128) { - p, l := s.p, s.l - - switch { - case l <= 16: - switch { - case l > 8: // 9-16 - const bitflipl = key64_032 ^ key64_040 - const bitfliph = key64_048 ^ key64_056 - - input_lo := readU64(p, 0) - input_hi := readU64(p, ui(l)-8) - - m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1) - - m128_l += uint64(l-1) << 54 - input_hi ^= bitfliph - - m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1) - - m128_l ^= bits.ReverseBytes64(m128_h) - - acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2) - acc.Hi += m128_h * prime64_2 - - acc.Lo = xxh3Avalanche(acc.Lo) - acc.Hi = xxh3Avalanche(acc.Hi) - - return acc - - case l > 3: // 4-8 - const bitflip = key64_016 ^ key64_024 - - input_lo := readU32(p, 0) - input_hi := readU32(p, ui(l)-4) - input_64 := u64(input_lo) + u64(input_hi)<<32 - keyed := input_64 ^ bitflip - - acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2)) - - acc.Hi += acc.Lo << 1 - acc.Lo ^= acc.Hi >> 3 - - acc.Lo ^= acc.Lo >> 35 - acc.Lo *= 0x9fb21c651e98df25 - acc.Lo ^= acc.Lo >> 28 - acc.Hi = xxh3Avalanche(acc.Hi) - - return acc - - case l == 3: // 3 - c12 := u64(readU16(p, 0)) - c3 := u64(readU8(p, 2)) - acc.Lo = c12<<16 + c3 + 3<<8 - - case l > 1: // 2 - c12 := u64(readU16(p, 0)) - acc.Lo = c12*(1<<24+1)>>8 + 2<<8 - - case l == 1: // 1 - c1 := u64(readU8(p, 0)) - acc.Lo = c1*(1<<24+1<<16+1) + 1<<8 - - default: // 0 - return u128{0x99aa06d3014798d8, 0x6001c324468d497f} - } - - acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13)) - acc.Lo ^= uint64(key32_000 ^ key32_004) - acc.Hi ^= uint64(key32_008 ^ key32_012) - - acc.Lo = xxh64AvalancheSmall(acc.Lo) - acc.Hi = xxh64AvalancheSmall(acc.Hi) - - return acc - - case l <= 128: - acc.Lo = u64(l) * prime64_1 - - if l > 32 { - if l > 64 { - if l > 96 { - in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8) - i6, i7 := readU64(p, 6*8), readU64(p, 7*8) - - acc.Hi += mulFold64(in8^key64_112, in7^key64_120) - acc.Hi ^= i6 + i7 - acc.Lo += mulFold64(i6^key64_096, i7^key64_104) - acc.Lo ^= in8 + in7 - - } // 96 - - in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8) - i4, i5 := readU64(p, 4*8), readU64(p, 5*8) - - acc.Hi += mulFold64(in6^key64_080, in5^key64_088) - acc.Hi ^= i4 + i5 - acc.Lo += mulFold64(i4^key64_064, i5^key64_072) - acc.Lo ^= in6 + in5 - - } // 64 - - in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8) - i2, i3 := readU64(p, 2*8), readU64(p, 3*8) - - acc.Hi += mulFold64(in4^key64_048, in3^key64_056) - acc.Hi ^= i2 + i3 - acc.Lo += mulFold64(i2^key64_032, i3^key64_040) - acc.Lo ^= in4 + in3 - - } // 32 - - in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8) - i0, i1 := readU64(p, 0*8), readU64(p, 1*8) - - acc.Hi += mulFold64(in2^key64_016, in1^key64_024) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^key64_000, i1^key64_008) - acc.Lo ^= in2 + in1 - - acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo - - acc.Hi = -xxh3Avalanche(acc.Hi) - acc.Lo = xxh3Avalanche(acc.Lo) - - return acc - - case l <= 240: - acc.Lo = u64(l) * prime64_1 - - { - i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8) - - acc.Hi += mulFold64(i2^key64_016, i3^key64_024) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^key64_000, i1^key64_008) - acc.Lo ^= i2 + i3 - } - - { - i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8) - - acc.Hi += mulFold64(i2^key64_048, i3^key64_056) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^key64_032, i1^key64_040) - acc.Lo ^= i2 + i3 - } - - { - i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8) - - acc.Hi += mulFold64(i2^key64_080, i3^key64_088) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^key64_064, i1^key64_072) - acc.Lo ^= i2 + i3 - } - - { - i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8) - - acc.Hi += mulFold64(i2^key64_112, i3^key64_120) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^key64_096, i1^key64_104) - acc.Lo ^= i2 + i3 - } - - // avalanche - acc.Hi = xxh3Avalanche(acc.Hi) - acc.Lo = xxh3Avalanche(acc.Lo) - - // trailing groups after 128 - top := ui(l) &^ 31 - for i := ui(4 * 32); i < top; i += 32 { - i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24) - k0, k1, k2, k3 := readU64(key, i-125), readU64(key, i-117), readU64(key, i-109), readU64(key, i-101) - - acc.Hi += mulFold64(i2^k2, i3^k3) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^k0, i1^k1) - acc.Lo ^= i2 + i3 - } - - // last 32 bytes - { - i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8) - - acc.Hi += mulFold64(i0^key64_119, i1^key64_127) - acc.Hi ^= i2 + i3 - acc.Lo += mulFold64(i2^key64_103, i3^key64_111) - acc.Lo ^= i0 + i1 - } - - acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+(u64(l)*prime64_2), acc.Hi+acc.Lo - - acc.Hi = -xxh3Avalanche(acc.Hi) - acc.Lo = xxh3Avalanche(acc.Lo) - - return acc - - default: - acc.Lo = u64(l) * prime64_1 - acc.Hi = ^(u64(l) * prime64_2) - - accs := [8]u64{ - prime32_3, prime64_1, prime64_2, prime64_3, - prime64_4, prime32_2, prime64_5, prime32_1, - } - - if hasAVX512 && l >= avx512Switch { - accumAVX512(&accs, p, key, u64(l)) - } else if hasAVX2 { - accumAVX2(&accs, p, key, u64(l)) - } else if hasSSE2 { - accumSSE(&accs, p, key, u64(l)) - } else { - accumScalar(&accs, p, key, u64(l)) - } - - // merge accs - acc.Lo += mulFold64(accs[0]^key64_011, accs[1]^key64_019) - acc.Hi += mulFold64(accs[0]^key64_117, accs[1]^key64_125) - - acc.Lo += mulFold64(accs[2]^key64_027, accs[3]^key64_035) - acc.Hi += mulFold64(accs[2]^key64_133, accs[3]^key64_141) - - acc.Lo += mulFold64(accs[4]^key64_043, accs[5]^key64_051) - acc.Hi += mulFold64(accs[4]^key64_149, accs[5]^key64_157) - - acc.Lo += mulFold64(accs[6]^key64_059, accs[7]^key64_067) - acc.Hi += mulFold64(accs[6]^key64_165, accs[7]^key64_173) - - acc.Lo = xxh3Avalanche(acc.Lo) - acc.Hi = xxh3Avalanche(acc.Hi) - - return acc - } -} diff --git a/vendor/github.com/zeebo/xxh3/hash128_seed.go b/vendor/github.com/zeebo/xxh3/hash128_seed.go deleted file mode 100644 index 358009be3..000000000 --- a/vendor/github.com/zeebo/xxh3/hash128_seed.go +++ /dev/null @@ -1,264 +0,0 @@ -package xxh3 - -import ( - "math/bits" -) - -// Hash128Seed returns the 128-bit hash of the byte slice. -func Hash128Seed(b []byte, seed uint64) Uint128 { - return hashAny128Seed(*(*str)(ptr(&b)), seed) -} - -// HashString128Seed returns the 128-bit hash of the string slice. -func HashString128Seed(s string, seed uint64) Uint128 { - return hashAny128Seed(*(*str)(ptr(&s)), seed) -} - -func hashAny128Seed(s str, seed uint64) (acc u128) { - p, l := s.p, s.l - - switch { - case l <= 16: - switch { - case l > 8: // 9-16 - bitflipl := (key64_032 ^ key64_040) - seed - bitfliph := (key64_048 ^ key64_056) + seed - - input_lo := readU64(p, 0) - input_hi := readU64(p, ui(l)-8) - - m128_h, m128_l := bits.Mul64(input_lo^input_hi^bitflipl, prime64_1) - - m128_l += uint64(l-1) << 54 - input_hi ^= bitfliph - - m128_h += input_hi + uint64(uint32(input_hi))*(prime32_2-1) - - m128_l ^= bits.ReverseBytes64(m128_h) - - acc.Hi, acc.Lo = bits.Mul64(m128_l, prime64_2) - acc.Hi += m128_h * prime64_2 - - acc.Lo = xxh3Avalanche(acc.Lo) - acc.Hi = xxh3Avalanche(acc.Hi) - - return acc - - case l > 3: // 4-8 - seed ^= u64(bits.ReverseBytes32(u32(seed))) << 32 - bitflip := (key64_016 ^ key64_024) + seed - input_lo := readU32(p, 0) - input_hi := readU32(p, ui(l)-4) - input_64 := u64(input_lo) + u64(input_hi)<<32 - keyed := input_64 ^ bitflip - - acc.Hi, acc.Lo = bits.Mul64(keyed, prime64_1+(uint64(l)<<2)) - - acc.Hi += acc.Lo << 1 - acc.Lo ^= acc.Hi >> 3 - - acc.Lo ^= acc.Lo >> 35 - acc.Lo *= 0x9fb21c651e98df25 - acc.Lo ^= acc.Lo >> 28 - acc.Hi = xxh3Avalanche(acc.Hi) - - return acc - - case l == 3: // 3 - c12 := u64(readU16(p, 0)) - c3 := u64(readU8(p, 2)) - acc.Lo = c12<<16 + c3 + 3<<8 - - case l > 1: // 2 - c12 := u64(readU16(p, 0)) - acc.Lo = c12*(1<<24+1)>>8 + 2<<8 - - case l == 1: // 1 - c1 := u64(readU8(p, 0)) - acc.Lo = c1*(1<<24+1<<16+1) + 1<<8 - - default: // 0 - bitflipl := key64_064 ^ key64_072 ^ seed - bitfliph := key64_080 ^ key64_088 ^ seed - return u128{Lo: xxh64AvalancheFull(bitflipl), Hi: xxh64AvalancheFull(bitfliph)} - } - - acc.Hi = uint64(bits.RotateLeft32(bits.ReverseBytes32(uint32(acc.Lo)), 13)) - acc.Lo ^= uint64(key32_000^key32_004) + seed - acc.Hi ^= uint64(key32_008^key32_012) - seed - - acc.Lo = xxh64AvalancheFull(acc.Lo) - acc.Hi = xxh64AvalancheFull(acc.Hi) - - return acc - - case l <= 128: - acc.Lo = u64(l) * prime64_1 - - if l > 32 { - if l > 64 { - if l > 96 { - in8, in7 := readU64(p, ui(l)-8*8), readU64(p, ui(l)-7*8) - i6, i7 := readU64(p, 6*8), readU64(p, 7*8) - - acc.Hi += mulFold64(in8^(key64_112+seed), in7^(key64_120-seed)) - acc.Hi ^= i6 + i7 - acc.Lo += mulFold64(i6^(key64_096+seed), i7^(key64_104-seed)) - acc.Lo ^= in8 + in7 - - } // 96 - - in6, in5 := readU64(p, ui(l)-6*8), readU64(p, ui(l)-5*8) - i4, i5 := readU64(p, 4*8), readU64(p, 5*8) - - acc.Hi += mulFold64(in6^(key64_080+seed), in5^(key64_088-seed)) - acc.Hi ^= i4 + i5 - acc.Lo += mulFold64(i4^(key64_064+seed), i5^(key64_072-seed)) - acc.Lo ^= in6 + in5 - - } // 64 - - in4, in3 := readU64(p, ui(l)-4*8), readU64(p, ui(l)-3*8) - i2, i3 := readU64(p, 2*8), readU64(p, 3*8) - - acc.Hi += mulFold64(in4^(key64_048+seed), in3^(key64_056-seed)) - acc.Hi ^= i2 + i3 - acc.Lo += mulFold64(i2^(key64_032+seed), i3^(key64_040-seed)) - acc.Lo ^= in4 + in3 - - } // 32 - - in2, in1 := readU64(p, ui(l)-2*8), readU64(p, ui(l)-1*8) - i0, i1 := readU64(p, 0*8), readU64(p, 1*8) - - acc.Hi += mulFold64(in2^(key64_016+seed), in1^(key64_024-seed)) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed)) - acc.Lo ^= in2 + in1 - - acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo - - acc.Hi = -xxh3Avalanche(acc.Hi) - acc.Lo = xxh3Avalanche(acc.Lo) - - return acc - - case l <= 240: - acc.Lo = u64(l) * prime64_1 - - { - i0, i1, i2, i3 := readU64(p, 0*8), readU64(p, 1*8), readU64(p, 2*8), readU64(p, 3*8) - - acc.Hi += mulFold64(i2^(key64_016+seed), i3^(key64_024-seed)) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^(key64_000+seed), i1^(key64_008-seed)) - acc.Lo ^= i2 + i3 - } - - { - i0, i1, i2, i3 := readU64(p, 4*8), readU64(p, 5*8), readU64(p, 6*8), readU64(p, 7*8) - - acc.Hi += mulFold64(i2^(key64_048+seed), i3^(key64_056-seed)) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^(key64_032+seed), i1^(key64_040-seed)) - acc.Lo ^= i2 + i3 - } - - { - i0, i1, i2, i3 := readU64(p, 8*8), readU64(p, 9*8), readU64(p, 10*8), readU64(p, 11*8) - - acc.Hi += mulFold64(i2^(key64_080+seed), i3^(key64_088-seed)) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^(key64_064+seed), i1^(key64_072-seed)) - acc.Lo ^= i2 + i3 - } - - { - i0, i1, i2, i3 := readU64(p, 12*8), readU64(p, 13*8), readU64(p, 14*8), readU64(p, 15*8) - - acc.Hi += mulFold64(i2^(key64_112+seed), i3^(key64_120-seed)) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^(key64_096+seed), i1^(key64_104-seed)) - acc.Lo ^= i2 + i3 - } - - // avalanche - acc.Hi = xxh3Avalanche(acc.Hi) - acc.Lo = xxh3Avalanche(acc.Lo) - - // trailing groups after 128 - top := ui(l) &^ 31 - for i := ui(4 * 32); i < top; i += 32 { - i0, i1, i2, i3 := readU64(p, i+0), readU64(p, i+8), readU64(p, i+16), readU64(p, i+24) - k0, k1, k2, k3 := readU64(key, i-125)+seed, readU64(key, i-117)-seed, readU64(key, i-109)+seed, readU64(key, i-101)-seed - - acc.Hi += mulFold64(i2^k2, i3^k3) - acc.Hi ^= i0 + i1 - acc.Lo += mulFold64(i0^k0, i1^k1) - acc.Lo ^= i2 + i3 - } - - // last 32 bytes - { - i0, i1, i2, i3 := readU64(p, ui(l)-32), readU64(p, ui(l)-24), readU64(p, ui(l)-16), readU64(p, ui(l)-8) - - seed := 0 - seed - acc.Hi += mulFold64(i0^(key64_119+seed), i1^(key64_127-seed)) - acc.Hi ^= i2 + i3 - acc.Lo += mulFold64(i2^(key64_103+seed), i3^(key64_111-seed)) - acc.Lo ^= i0 + i1 - } - - acc.Hi, acc.Lo = (acc.Lo*prime64_1)+(acc.Hi*prime64_4)+((u64(l)-seed)*prime64_2), acc.Hi+acc.Lo - - acc.Hi = -xxh3Avalanche(acc.Hi) - acc.Lo = xxh3Avalanche(acc.Lo) - - return acc - - default: - acc.Lo = u64(l) * prime64_1 - acc.Hi = ^(u64(l) * prime64_2) - - secret := key - if seed != 0 { - secret = ptr(&[secretSize]byte{}) - initSecret(secret, seed) - } - - accs := [8]u64{ - prime32_3, prime64_1, prime64_2, prime64_3, - prime64_4, prime32_2, prime64_5, prime32_1, - } - - if hasAVX512 && l >= avx512Switch { - accumAVX512(&accs, p, secret, u64(l)) - } else if hasAVX2 { - accumAVX2(&accs, p, secret, u64(l)) - } else if hasSSE2 { - accumSSE(&accs, p, secret, u64(l)) - } else { - accumScalar(&accs, p, secret, u64(l)) - } - - // merge accs - const hi_off = 117 - 11 - - acc.Lo += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) - acc.Hi += mulFold64(accs[0]^readU64(secret, 11+hi_off), accs[1]^readU64(secret, 19+hi_off)) - - acc.Lo += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) - acc.Hi += mulFold64(accs[2]^readU64(secret, 27+hi_off), accs[3]^readU64(secret, 35+hi_off)) - - acc.Lo += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) - acc.Hi += mulFold64(accs[4]^readU64(secret, 43+hi_off), accs[5]^readU64(secret, 51+hi_off)) - - acc.Lo += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) - acc.Hi += mulFold64(accs[6]^readU64(secret, 59+hi_off), accs[7]^readU64(secret, 67+hi_off)) - - acc.Lo = xxh3Avalanche(acc.Lo) - acc.Hi = xxh3Avalanche(acc.Hi) - - return acc - } -} diff --git a/vendor/github.com/zeebo/xxh3/hash64.go b/vendor/github.com/zeebo/xxh3/hash64.go deleted file mode 100644 index 13aab9585..000000000 --- a/vendor/github.com/zeebo/xxh3/hash64.go +++ /dev/null @@ -1,126 +0,0 @@ -package xxh3 - -import "math/bits" - -// Hash returns the hash of the byte slice. -func Hash(b []byte) uint64 { - return hashAny(*(*str)(ptr(&b))) -} - -// Hash returns the hash of the string slice. -func HashString(s string) uint64 { - return hashAny(*(*str)(ptr(&s))) -} - -func hashAny(s str) (acc u64) { - p, l := s.p, s.l - - switch { - case l <= 16: - switch { - case l > 8: // 9-16 - inputlo := readU64(p, 0) ^ (key64_024 ^ key64_032) - inputhi := readU64(p, ui(l)-8) ^ (key64_040 ^ key64_048) - folded := mulFold64(inputlo, inputhi) - return xxh3Avalanche(u64(l) + bits.ReverseBytes64(inputlo) + inputhi + folded) - - case l > 3: // 4-8 - input1 := readU32(p, 0) - input2 := readU32(p, ui(l)-4) - input64 := u64(input2) + u64(input1)<<32 - keyed := input64 ^ (key64_008 ^ key64_016) - return rrmxmx(keyed, u64(l)) - - case l == 3: // 3 - c12 := u64(readU16(p, 0)) - c3 := u64(readU8(p, 2)) - acc = c12<<16 + c3 + 3<<8 - - case l > 1: // 2 - c12 := u64(readU16(p, 0)) - acc = c12*(1<<24+1)>>8 + 2<<8 - - case l == 1: // 1 - c1 := u64(readU8(p, 0)) - acc = c1*(1<<24+1<<16+1) + 1<<8 - - default: // 0 - return 0x2d06800538d394c2 // xxh_avalanche(key64_056 ^ key64_064) - } - - acc ^= u64(key32_000 ^ key32_004) - return xxhAvalancheSmall(acc) - - case l <= 128: - acc = u64(l) * prime64_1 - - if l > 32 { - if l > 64 { - if l > 96 { - acc += mulFold64(readU64(p, 6*8)^key64_096, readU64(p, 7*8)^key64_104) - acc += mulFold64(readU64(p, ui(l)-8*8)^key64_112, readU64(p, ui(l)-7*8)^key64_120) - } // 96 - acc += mulFold64(readU64(p, 4*8)^key64_064, readU64(p, 5*8)^key64_072) - acc += mulFold64(readU64(p, ui(l)-6*8)^key64_080, readU64(p, ui(l)-5*8)^key64_088) - } // 64 - acc += mulFold64(readU64(p, 2*8)^key64_032, readU64(p, 3*8)^key64_040) - acc += mulFold64(readU64(p, ui(l)-4*8)^key64_048, readU64(p, ui(l)-3*8)^key64_056) - } // 32 - acc += mulFold64(readU64(p, 0*8)^key64_000, readU64(p, 1*8)^key64_008) - acc += mulFold64(readU64(p, ui(l)-2*8)^key64_016, readU64(p, ui(l)-1*8)^key64_024) - - return xxh3Avalanche(acc) - - case l <= 240: - acc = u64(l) * prime64_1 - - acc += mulFold64(readU64(p, 0*16+0)^key64_000, readU64(p, 0*16+8)^key64_008) - acc += mulFold64(readU64(p, 1*16+0)^key64_016, readU64(p, 1*16+8)^key64_024) - acc += mulFold64(readU64(p, 2*16+0)^key64_032, readU64(p, 2*16+8)^key64_040) - acc += mulFold64(readU64(p, 3*16+0)^key64_048, readU64(p, 3*16+8)^key64_056) - acc += mulFold64(readU64(p, 4*16+0)^key64_064, readU64(p, 4*16+8)^key64_072) - acc += mulFold64(readU64(p, 5*16+0)^key64_080, readU64(p, 5*16+8)^key64_088) - acc += mulFold64(readU64(p, 6*16+0)^key64_096, readU64(p, 6*16+8)^key64_104) - acc += mulFold64(readU64(p, 7*16+0)^key64_112, readU64(p, 7*16+8)^key64_120) - - // avalanche - acc = xxh3Avalanche(acc) - - // trailing groups after 128 - top := ui(l) &^ 15 - for i := ui(8 * 16); i < top; i += 16 { - acc += mulFold64(readU64(p, i+0)^readU64(key, i-125), readU64(p, i+8)^readU64(key, i-117)) - } - - // last 16 bytes - acc += mulFold64(readU64(p, ui(l)-16)^key64_119, readU64(p, ui(l)-8)^key64_127) - - return xxh3Avalanche(acc) - - default: - acc = u64(l) * prime64_1 - - accs := [8]u64{ - prime32_3, prime64_1, prime64_2, prime64_3, - prime64_4, prime32_2, prime64_5, prime32_1, - } - - if hasAVX512 && l >= avx512Switch { - accumAVX512(&accs, p, key, u64(l)) - } else if hasAVX2 { - accumAVX2(&accs, p, key, u64(l)) - } else if hasSSE2 { - accumSSE(&accs, p, key, u64(l)) - } else { - accumScalar(&accs, p, key, u64(l)) - } - - // merge accs - acc += mulFold64(accs[0]^key64_011, accs[1]^key64_019) - acc += mulFold64(accs[2]^key64_027, accs[3]^key64_035) - acc += mulFold64(accs[4]^key64_043, accs[5]^key64_051) - acc += mulFold64(accs[6]^key64_059, accs[7]^key64_067) - - return xxh3Avalanche(acc) - } -} diff --git a/vendor/github.com/zeebo/xxh3/hash64_seed.go b/vendor/github.com/zeebo/xxh3/hash64_seed.go deleted file mode 100644 index 429994c36..000000000 --- a/vendor/github.com/zeebo/xxh3/hash64_seed.go +++ /dev/null @@ -1,134 +0,0 @@ -package xxh3 - -import "math/bits" - -// HashSeed returns the hash of the byte slice with given seed. -func HashSeed(b []byte, seed uint64) uint64 { - return hashAnySeed(*(*str)(ptr(&b)), seed) - -} - -// HashStringSeed returns the hash of the string slice with given seed. -func HashStringSeed(s string, seed uint64) uint64 { - return hashAnySeed(*(*str)(ptr(&s)), seed) -} - -func hashAnySeed(s str, seed uint64) (acc u64) { - p, l := s.p, s.l - - switch { - case l <= 16: - switch { - case l > 8: - inputlo := readU64(p, 0) ^ (key64_024 ^ key64_032 + seed) - inputhi := readU64(p, ui(l)-8) ^ (key64_040 ^ key64_048 - seed) - folded := mulFold64(inputlo, inputhi) - return xxh3Avalanche(u64(l) + bits.ReverseBytes64(inputlo) + inputhi + folded) - - case l > 3: - seed ^= u64(bits.ReverseBytes32(u32(seed))) << 32 - input1 := readU32(p, 0) - input2 := readU32(p, ui(l)-4) - input64 := u64(input2) + u64(input1)<<32 - keyed := input64 ^ (key64_008 ^ key64_016 - seed) - return rrmxmx(keyed, u64(l)) - - case l == 3: // 3 - c12 := u64(readU16(p, 0)) - c3 := u64(readU8(p, 2)) - acc = c12<<16 + c3 + 3<<8 - - case l > 1: // 2 - c12 := u64(readU16(p, 0)) - acc = c12*(1<<24+1)>>8 + 2<<8 - - case l == 1: // 1 - c1 := u64(readU8(p, 0)) - acc = c1*(1<<24+1<<16+1) + 1<<8 - - default: - return xxhAvalancheSmall(seed ^ key64_056 ^ key64_064) - } - - acc ^= u64(key32_000^key32_004) + seed - return xxhAvalancheSmall(acc) - - case l <= 128: - acc = u64(l) * prime64_1 - - if l > 32 { - if l > 64 { - if l > 96 { - acc += mulFold64(readU64(p, 6*8)^(key64_096+seed), readU64(p, 7*8)^(key64_104-seed)) - acc += mulFold64(readU64(p, ui(l)-8*8)^(key64_112+seed), readU64(p, ui(l)-7*8)^(key64_120-seed)) - } // 96 - acc += mulFold64(readU64(p, 4*8)^(key64_064+seed), readU64(p, 5*8)^(key64_072-seed)) - acc += mulFold64(readU64(p, ui(l)-6*8)^(key64_080+seed), readU64(p, ui(l)-5*8)^(key64_088-seed)) - } // 64 - acc += mulFold64(readU64(p, 2*8)^(key64_032+seed), readU64(p, 3*8)^(key64_040-seed)) - acc += mulFold64(readU64(p, ui(l)-4*8)^(key64_048+seed), readU64(p, ui(l)-3*8)^(key64_056-seed)) - } // 32 - acc += mulFold64(readU64(p, 0*8)^(key64_000+seed), readU64(p, 1*8)^(key64_008-seed)) - acc += mulFold64(readU64(p, ui(l)-2*8)^(key64_016+seed), readU64(p, ui(l)-1*8)^(key64_024-seed)) - - return xxh3Avalanche(acc) - - case l <= 240: - acc = u64(l) * prime64_1 - - acc += mulFold64(readU64(p, 0*16+0)^(key64_000+seed), readU64(p, 0*16+8)^(key64_008-seed)) - acc += mulFold64(readU64(p, 1*16+0)^(key64_016+seed), readU64(p, 1*16+8)^(key64_024-seed)) - acc += mulFold64(readU64(p, 2*16+0)^(key64_032+seed), readU64(p, 2*16+8)^(key64_040-seed)) - acc += mulFold64(readU64(p, 3*16+0)^(key64_048+seed), readU64(p, 3*16+8)^(key64_056-seed)) - acc += mulFold64(readU64(p, 4*16+0)^(key64_064+seed), readU64(p, 4*16+8)^(key64_072-seed)) - acc += mulFold64(readU64(p, 5*16+0)^(key64_080+seed), readU64(p, 5*16+8)^(key64_088-seed)) - acc += mulFold64(readU64(p, 6*16+0)^(key64_096+seed), readU64(p, 6*16+8)^(key64_104-seed)) - acc += mulFold64(readU64(p, 7*16+0)^(key64_112+seed), readU64(p, 7*16+8)^(key64_120-seed)) - - // avalanche - acc = xxh3Avalanche(acc) - - // trailing groups after 128 - top := ui(l) &^ 15 - for i := ui(8 * 16); i < top; i += 16 { - acc += mulFold64(readU64(p, i+0)^(readU64(key, i-125)+seed), readU64(p, i+8)^(readU64(key, i-117)-seed)) - } - - // last 16 bytes - acc += mulFold64(readU64(p, ui(l)-16)^(key64_119+seed), readU64(p, ui(l)-8)^(key64_127-seed)) - - return xxh3Avalanche(acc) - - default: - acc = u64(l) * prime64_1 - - secret := key - if seed != 0 { - secret = ptr(&[secretSize]byte{}) - initSecret(secret, seed) - } - - accs := [8]u64{ - prime32_3, prime64_1, prime64_2, prime64_3, - prime64_4, prime32_2, prime64_5, prime32_1, - } - - if hasAVX512 && l >= avx512Switch { - accumAVX512(&accs, p, secret, u64(l)) - } else if hasAVX2 { - accumAVX2(&accs, p, secret, u64(l)) - } else if hasSSE2 { - accumSSE(&accs, p, secret, u64(l)) - } else { - accumScalarSeed(&accs, p, secret, u64(l)) - } - - // merge accs - acc += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) - acc += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) - acc += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) - acc += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) - - return xxh3Avalanche(acc) - } -} diff --git a/vendor/github.com/zeebo/xxh3/hasher.go b/vendor/github.com/zeebo/xxh3/hasher.go deleted file mode 100644 index d9789980a..000000000 --- a/vendor/github.com/zeebo/xxh3/hasher.go +++ /dev/null @@ -1,239 +0,0 @@ -package xxh3 - -import ( - "encoding/binary" - "hash" -) - -// Hasher implements the hash.Hash interface -type Hasher struct { - acc [8]u64 - blk u64 - len u64 - key ptr - buf [_block + _stripe]byte - seed u64 -} - -var ( - _ hash.Hash = (*Hasher)(nil) - _ hash.Hash64 = (*Hasher)(nil) -) - -// New returns a new Hasher that implements the hash.Hash interface. -func New() *Hasher { - return new(Hasher) -} - -// NewSeed returns a new Hasher that implements the hash.Hash interface. -func NewSeed(seed uint64) *Hasher { - var h Hasher - h.Reset() - h.seed = seed - h.key = key - - // Only initiate once, not on reset. - if seed != 0 { - h.key = ptr(&[secretSize]byte{}) - initSecret(h.key, seed) - } - return &h -} - -// Reset resets the Hash to its initial state. -func (h *Hasher) Reset() { - h.acc = [8]u64{ - prime32_3, prime64_1, prime64_2, prime64_3, - prime64_4, prime32_2, prime64_5, prime32_1, - } - h.blk = 0 - h.len = 0 -} - -// BlockSize returns the hash's underlying block size. -// The Write method will accept any amount of data, but -// it may operate more efficiently if all writes are a -// multiple of the block size. -func (h *Hasher) BlockSize() int { return _stripe } - -// Size returns the number of bytes Sum will return. -func (h *Hasher) Size() int { return 8 } - -// Sum appends the current hash to b and returns the resulting slice. -// It does not change the underlying hash state. -func (h *Hasher) Sum(b []byte) []byte { - var tmp [8]byte - binary.BigEndian.PutUint64(tmp[:], h.Sum64()) - return append(b, tmp[:]...) -} - -// Write adds more data to the running hash. -// It never returns an error. -func (h *Hasher) Write(buf []byte) (int, error) { - h.update(buf) - return len(buf), nil -} - -// WriteString adds more data to the running hash. -// It never returns an error. -func (h *Hasher) WriteString(buf string) (int, error) { - h.updateString(buf) - return len(buf), nil -} - -func (h *Hasher) update(buf []byte) { - // relies on the data pointer being the first word in the string header - h.updateString(*(*string)(ptr(&buf))) -} - -func (h *Hasher) updateString(buf string) { - if h.key == nil { - h.key = key - h.Reset() - } - - // On first write, if more than 1 block, process without copy. - for h.len == 0 && len(buf) > len(h.buf) { - if hasAVX2 { - accumBlockAVX2(&h.acc, *(*ptr)(ptr(&buf)), h.key) - } else if hasSSE2 { - accumBlockSSE(&h.acc, *(*ptr)(ptr(&buf)), h.key) - } else { - accumBlockScalar(&h.acc, *(*ptr)(ptr(&buf)), h.key) - } - buf = buf[_block:] - h.blk++ - } - - for len(buf) > 0 { - if h.len < u64(len(h.buf)) { - n := copy(h.buf[h.len:], buf) - h.len += u64(n) - buf = buf[n:] - continue - } - - if hasAVX2 { - accumBlockAVX2(&h.acc, ptr(&h.buf), h.key) - } else if hasSSE2 { - accumBlockSSE(&h.acc, ptr(&h.buf), h.key) - } else { - accumBlockScalar(&h.acc, ptr(&h.buf), h.key) - } - - h.blk++ - h.len = _stripe - copy(h.buf[:_stripe], h.buf[_block:]) - } -} - -// Sum64 returns the 64-bit hash of the written data. -func (h *Hasher) Sum64() uint64 { - if h.key == nil { - h.key = key - h.Reset() - } - - if h.blk == 0 { - if h.seed == 0 { - return Hash(h.buf[:h.len]) - } - return HashSeed(h.buf[:h.len], h.seed) - } - - l := h.blk*_block + h.len - acc := l * prime64_1 - accs := h.acc - - if h.len > 0 { - // We are only ever doing 1 block here, so no avx512. - if hasAVX2 { - accumAVX2(&accs, ptr(&h.buf[0]), h.key, h.len) - } else if hasSSE2 { - accumSSE(&accs, ptr(&h.buf[0]), h.key, h.len) - } else { - accumScalar(&accs, ptr(&h.buf[0]), h.key, h.len) - } - } - - if h.seed == 0 { - acc += mulFold64(accs[0]^key64_011, accs[1]^key64_019) - acc += mulFold64(accs[2]^key64_027, accs[3]^key64_035) - acc += mulFold64(accs[4]^key64_043, accs[5]^key64_051) - acc += mulFold64(accs[6]^key64_059, accs[7]^key64_067) - } else { - secret := h.key - acc += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) - acc += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) - acc += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) - acc += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) - } - - acc = xxh3Avalanche(acc) - - return acc -} - -// Sum128 returns the 128-bit hash of the written data. -func (h *Hasher) Sum128() Uint128 { - if h.key == nil { - h.key = key - h.Reset() - } - - if h.blk == 0 { - if h.seed == 0 { - return Hash128(h.buf[:h.len]) - } - return Hash128Seed(h.buf[:h.len], h.seed) - } - - l := h.blk*_block + h.len - acc := Uint128{Lo: l * prime64_1, Hi: ^(l * prime64_2)} - accs := h.acc - - if h.len > 0 { - // We are only ever doing 1 block here, so no avx512. - if hasAVX2 { - accumAVX2(&accs, ptr(&h.buf[0]), h.key, h.len) - } else if hasSSE2 { - accumSSE(&accs, ptr(&h.buf[0]), h.key, h.len) - } else { - accumScalar(&accs, ptr(&h.buf[0]), h.key, h.len) - } - } - - if h.seed == 0 { - acc.Lo += mulFold64(accs[0]^key64_011, accs[1]^key64_019) - acc.Hi += mulFold64(accs[0]^key64_117, accs[1]^key64_125) - - acc.Lo += mulFold64(accs[2]^key64_027, accs[3]^key64_035) - acc.Hi += mulFold64(accs[2]^key64_133, accs[3]^key64_141) - - acc.Lo += mulFold64(accs[4]^key64_043, accs[5]^key64_051) - acc.Hi += mulFold64(accs[4]^key64_149, accs[5]^key64_157) - - acc.Lo += mulFold64(accs[6]^key64_059, accs[7]^key64_067) - acc.Hi += mulFold64(accs[6]^key64_165, accs[7]^key64_173) - } else { - secret := h.key - const hi_off = 117 - 11 - - acc.Lo += mulFold64(accs[0]^readU64(secret, 11), accs[1]^readU64(secret, 19)) - acc.Hi += mulFold64(accs[0]^readU64(secret, 11+hi_off), accs[1]^readU64(secret, 19+hi_off)) - - acc.Lo += mulFold64(accs[2]^readU64(secret, 27), accs[3]^readU64(secret, 35)) - acc.Hi += mulFold64(accs[2]^readU64(secret, 27+hi_off), accs[3]^readU64(secret, 35+hi_off)) - - acc.Lo += mulFold64(accs[4]^readU64(secret, 43), accs[5]^readU64(secret, 51)) - acc.Hi += mulFold64(accs[4]^readU64(secret, 43+hi_off), accs[5]^readU64(secret, 51+hi_off)) - - acc.Lo += mulFold64(accs[6]^readU64(secret, 59), accs[7]^readU64(secret, 67)) - acc.Hi += mulFold64(accs[6]^readU64(secret, 59+hi_off), accs[7]^readU64(secret, 67+hi_off)) - } - - acc.Lo = xxh3Avalanche(acc.Lo) - acc.Hi = xxh3Avalanche(acc.Hi) - - return acc -} diff --git a/vendor/github.com/zeebo/xxh3/utils.go b/vendor/github.com/zeebo/xxh3/utils.go deleted file mode 100644 index a837e68a6..000000000 --- a/vendor/github.com/zeebo/xxh3/utils.go +++ /dev/null @@ -1,129 +0,0 @@ -package xxh3 - -import ( - "math/bits" - "unsafe" -) - -// Uint128 is a 128 bit value. -// The actual value can be thought of as u.Hi<<64 | u.Lo. -type Uint128 struct { - Hi, Lo uint64 -} - -// Bytes returns the uint128 as an array of bytes in canonical form (big-endian encoded). -func (u Uint128) Bytes() [16]byte { - return [16]byte{ - byte(u.Hi >> 0x38), byte(u.Hi >> 0x30), byte(u.Hi >> 0x28), byte(u.Hi >> 0x20), - byte(u.Hi >> 0x18), byte(u.Hi >> 0x10), byte(u.Hi >> 0x08), byte(u.Hi), - byte(u.Lo >> 0x38), byte(u.Lo >> 0x30), byte(u.Lo >> 0x28), byte(u.Lo >> 0x20), - byte(u.Lo >> 0x18), byte(u.Lo >> 0x10), byte(u.Lo >> 0x08), byte(u.Lo), - } -} - -type ( - ptr = unsafe.Pointer - ui = uintptr - - u8 = uint8 - u32 = uint32 - u64 = uint64 - u128 = Uint128 -) - -type str struct { - p ptr - l uint -} - -func readU8(p ptr, o ui) uint8 { - return *(*uint8)(ptr(ui(p) + o)) -} - -func readU16(p ptr, o ui) uint16 { - b := (*[2]byte)(ptr(ui(p) + o)) - return uint16(b[0]) | uint16(b[1])<<8 -} - -func readU32(p ptr, o ui) uint32 { - b := (*[4]byte)(ptr(ui(p) + o)) - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 -} - -func readU64(p ptr, o ui) uint64 { - b := (*[8]byte)(ptr(ui(p) + o)) - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 -} - -func writeU64(p ptr, o ui, v u64) { - b := (*[8]byte)(ptr(ui(p) + o)) - b[0] = byte(v) - b[1] = byte(v >> 8) - b[2] = byte(v >> 16) - b[3] = byte(v >> 24) - b[4] = byte(v >> 32) - b[5] = byte(v >> 40) - b[6] = byte(v >> 48) - b[7] = byte(v >> 56) -} - -const secretSize = 192 - -func initSecret(secret ptr, seed u64) { - for i := ui(0); i < secretSize/16; i++ { - lo := readU64(key, 16*i) + seed - hi := readU64(key, 16*i+8) - seed - writeU64(secret, 16*i, lo) - writeU64(secret, 16*i+8, hi) - } -} - -func xxh64AvalancheSmall(x u64) u64 { - // x ^= x >> 33 // x must be < 32 bits - // x ^= u64(key32_000 ^ key32_004) // caller must do this - x *= prime64_2 - x ^= x >> 29 - x *= prime64_3 - x ^= x >> 32 - return x -} - -func xxhAvalancheSmall(x u64) u64 { - x ^= x >> 33 - x *= prime64_2 - x ^= x >> 29 - x *= prime64_3 - x ^= x >> 32 - return x -} - -func xxh64AvalancheFull(x u64) u64 { - x ^= x >> 33 - x *= prime64_2 - x ^= x >> 29 - x *= prime64_3 - x ^= x >> 32 - return x -} - -func xxh3Avalanche(x u64) u64 { - x ^= x >> 37 - x *= 0x165667919e3779f9 - x ^= x >> 32 - return x -} - -func rrmxmx(h64 u64, len u64) u64 { - h64 ^= bits.RotateLeft64(h64, 49) ^ bits.RotateLeft64(h64, 24) - h64 *= 0x9fb21c651e98df25 - h64 ^= (h64 >> 35) + len - h64 *= 0x9fb21c651e98df25 - h64 ^= (h64 >> 28) - return h64 -} - -func mulFold64(x, y u64) u64 { - hi, lo := bits.Mul64(x, y) - return hi ^ lo -} |