diff options
Diffstat (limited to 'vendor/github.com/zeebo/xxh3/accum_generic.go')
| -rw-r--r-- | vendor/github.com/zeebo/xxh3/accum_generic.go | 542 |
1 files changed, 0 insertions, 542 deletions
diff --git a/vendor/github.com/zeebo/xxh3/accum_generic.go b/vendor/github.com/zeebo/xxh3/accum_generic.go deleted file mode 100644 index b1be78507..000000000 --- a/vendor/github.com/zeebo/xxh3/accum_generic.go +++ /dev/null @@ -1,542 +0,0 @@ -package xxh3 - -// avx512Switch is the size at which the avx512 code is used. -// Bigger blocks benefit more. -const avx512Switch = 1 << 10 - -func accumScalar(accs *[8]u64, p, secret ptr, l u64) { - if secret != key { - accumScalarSeed(accs, p, secret, l) - return - } - for l > _block { - k := secret - - // accs - for i := 0; i < 16; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(k, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(k, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(k, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(k, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(k, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(k, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(k, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(k, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - l -= _stripe - if l > 0 { - p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) - } - } - - // scramble accs - accs[0] ^= accs[0] >> 47 - accs[0] ^= key64_128 - accs[0] *= prime32_1 - - accs[1] ^= accs[1] >> 47 - accs[1] ^= key64_136 - accs[1] *= prime32_1 - - accs[2] ^= accs[2] >> 47 - accs[2] ^= key64_144 - accs[2] *= prime32_1 - - accs[3] ^= accs[3] >> 47 - accs[3] ^= key64_152 - accs[3] *= prime32_1 - - accs[4] ^= accs[4] >> 47 - accs[4] ^= key64_160 - accs[4] *= prime32_1 - - accs[5] ^= accs[5] >> 47 - accs[5] ^= key64_168 - accs[5] *= prime32_1 - - accs[6] ^= accs[6] >> 47 - accs[6] ^= key64_176 - accs[6] *= prime32_1 - - accs[7] ^= accs[7] >> 47 - accs[7] ^= key64_184 - accs[7] *= prime32_1 - } - - if l > 0 { - t, k := (l-1)/_stripe, secret - - for i := u64(0); i < t; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(k, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(k, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(k, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(k, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(k, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(k, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(k, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(k, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - l -= _stripe - if l > 0 { - p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) - } - } - - if l > 0 { - p = ptr(ui(p) - uintptr(_stripe-l)) - - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ key64_121 - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ key64_129 - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ key64_137 - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ key64_145 - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ key64_153 - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ key64_161 - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ key64_169 - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ key64_177 - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - } - } -} - -func accumBlockScalar(accs *[8]u64, p, secret ptr) { - if secret != key { - accumBlockScalarSeed(accs, p, secret) - return - } - // accs - for i := 0; i < 16; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(secret, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(secret, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(secret, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(secret, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(secret, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(secret, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(secret, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(secret, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8) - } - - // scramble accs - accs[0] ^= accs[0] >> 47 - accs[0] ^= key64_128 - accs[0] *= prime32_1 - - accs[1] ^= accs[1] >> 47 - accs[1] ^= key64_136 - accs[1] *= prime32_1 - - accs[2] ^= accs[2] >> 47 - accs[2] ^= key64_144 - accs[2] *= prime32_1 - - accs[3] ^= accs[3] >> 47 - accs[3] ^= key64_152 - accs[3] *= prime32_1 - - accs[4] ^= accs[4] >> 47 - accs[4] ^= key64_160 - accs[4] *= prime32_1 - - accs[5] ^= accs[5] >> 47 - accs[5] ^= key64_168 - accs[5] *= prime32_1 - - accs[6] ^= accs[6] >> 47 - accs[6] ^= key64_176 - accs[6] *= prime32_1 - - accs[7] ^= accs[7] >> 47 - accs[7] ^= key64_184 - accs[7] *= prime32_1 -} - -// accumScalarSeed should be used with custom key. -func accumScalarSeed(accs *[8]u64, p, secret ptr, l u64) { - for l > _block { - k := secret - - // accs - for i := 0; i < 16; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(k, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(k, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(k, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(k, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(k, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(k, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(k, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(k, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - l -= _stripe - if l > 0 { - p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) - } - } - - // scramble accs - accs[0] ^= accs[0] >> 47 - accs[0] ^= readU64(secret, 128) - accs[0] *= prime32_1 - - accs[1] ^= accs[1] >> 47 - accs[1] ^= readU64(secret, 136) - accs[1] *= prime32_1 - - accs[2] ^= accs[2] >> 47 - accs[2] ^= readU64(secret, 144) - accs[2] *= prime32_1 - - accs[3] ^= accs[3] >> 47 - accs[3] ^= readU64(secret, 152) - accs[3] *= prime32_1 - - accs[4] ^= accs[4] >> 47 - accs[4] ^= readU64(secret, 160) - accs[4] *= prime32_1 - - accs[5] ^= accs[5] >> 47 - accs[5] ^= readU64(secret, 168) - accs[5] *= prime32_1 - - accs[6] ^= accs[6] >> 47 - accs[6] ^= readU64(secret, 176) - accs[6] *= prime32_1 - - accs[7] ^= accs[7] >> 47 - accs[7] ^= readU64(secret, 184) - accs[7] *= prime32_1 - } - - if l > 0 { - t, k := (l-1)/_stripe, secret - - for i := u64(0); i < t; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(k, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(k, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(k, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(k, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(k, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(k, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(k, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(k, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - l -= _stripe - if l > 0 { - p, k = ptr(ui(p)+_stripe), ptr(ui(k)+8) - } - } - - if l > 0 { - p = ptr(ui(p) - uintptr(_stripe-l)) - - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(secret, 121) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(secret, 129) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(secret, 137) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(secret, 145) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(secret, 153) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(secret, 161) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(secret, 169) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(secret, 177) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - } - } -} - -// accumBlockScalarSeed should be used with custom key. -func accumBlockScalarSeed(accs *[8]u64, p, secret ptr) { - // accs - { - secret := secret - for i := 0; i < 16; i++ { - dv0 := readU64(p, 8*0) - dk0 := dv0 ^ readU64(secret, 8*0) - accs[1] += dv0 - accs[0] += (dk0 & 0xffffffff) * (dk0 >> 32) - - dv1 := readU64(p, 8*1) - dk1 := dv1 ^ readU64(secret, 8*1) - accs[0] += dv1 - accs[1] += (dk1 & 0xffffffff) * (dk1 >> 32) - - dv2 := readU64(p, 8*2) - dk2 := dv2 ^ readU64(secret, 8*2) - accs[3] += dv2 - accs[2] += (dk2 & 0xffffffff) * (dk2 >> 32) - - dv3 := readU64(p, 8*3) - dk3 := dv3 ^ readU64(secret, 8*3) - accs[2] += dv3 - accs[3] += (dk3 & 0xffffffff) * (dk3 >> 32) - - dv4 := readU64(p, 8*4) - dk4 := dv4 ^ readU64(secret, 8*4) - accs[5] += dv4 - accs[4] += (dk4 & 0xffffffff) * (dk4 >> 32) - - dv5 := readU64(p, 8*5) - dk5 := dv5 ^ readU64(secret, 8*5) - accs[4] += dv5 - accs[5] += (dk5 & 0xffffffff) * (dk5 >> 32) - - dv6 := readU64(p, 8*6) - dk6 := dv6 ^ readU64(secret, 8*6) - accs[7] += dv6 - accs[6] += (dk6 & 0xffffffff) * (dk6 >> 32) - - dv7 := readU64(p, 8*7) - dk7 := dv7 ^ readU64(secret, 8*7) - accs[6] += dv7 - accs[7] += (dk7 & 0xffffffff) * (dk7 >> 32) - - p, secret = ptr(ui(p)+_stripe), ptr(ui(secret)+8) - } - } - - // scramble accs - accs[0] ^= accs[0] >> 47 - accs[0] ^= readU64(secret, 128) - accs[0] *= prime32_1 - - accs[1] ^= accs[1] >> 47 - accs[1] ^= readU64(secret, 136) - accs[1] *= prime32_1 - - accs[2] ^= accs[2] >> 47 - accs[2] ^= readU64(secret, 144) - accs[2] *= prime32_1 - - accs[3] ^= accs[3] >> 47 - accs[3] ^= readU64(secret, 152) - accs[3] *= prime32_1 - - accs[4] ^= accs[4] >> 47 - accs[4] ^= readU64(secret, 160) - accs[4] *= prime32_1 - - accs[5] ^= accs[5] >> 47 - accs[5] ^= readU64(secret, 168) - accs[5] *= prime32_1 - - accs[6] ^= accs[6] >> 47 - accs[6] ^= readU64(secret, 176) - accs[6] *= prime32_1 - - accs[7] ^= accs[7] >> 47 - accs[7] ^= readU64(secret, 184) - accs[7] *= prime32_1 -} |
