diff options
author | 2021-08-12 21:03:24 +0200 | |
---|---|---|
committer | 2021-08-12 21:03:24 +0200 | |
commit | 98263a7de64269898a2f81207e38943b5c8e8653 (patch) | |
tree | 743c90f109a6c5d27832d1dcef2388d939f0f77a /vendor/golang.org/x/crypto/blake2s | |
parent | Text duplication fix (#137) (diff) | |
download | gotosocial-98263a7de64269898a2f81207e38943b5c8e8653.tar.xz |
Grand test fixup (#138)
* start fixing up tests
* fix up tests + automate with drone
* fiddle with linting
* messing about with drone.yml
* some more fiddling
* hmmm
* add cache
* add vendor directory
* verbose
* ci updates
* update some little things
* update sig
Diffstat (limited to 'vendor/golang.org/x/crypto/blake2s')
-rw-r--r-- | vendor/golang.org/x/crypto/blake2s/blake2s.go | 246 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/blake2s/blake2s_386.go | 33 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/blake2s/blake2s_386.s | 430 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/blake2s/blake2s_amd64.go | 38 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/blake2s/blake2s_amd64.s | 433 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/blake2s/blake2s_generic.go | 178 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/blake2s/blake2s_ref.go | 18 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/blake2s/blake2x.go | 178 | ||||
-rw-r--r-- | vendor/golang.org/x/crypto/blake2s/register.go | 22 |
9 files changed, 1576 insertions, 0 deletions
diff --git a/vendor/golang.org/x/crypto/blake2s/blake2s.go b/vendor/golang.org/x/crypto/blake2s/blake2s.go new file mode 100644 index 000000000..e3f46aab3 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2s/blake2s.go @@ -0,0 +1,246 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package blake2s implements the BLAKE2s hash algorithm defined by RFC 7693 +// and the extendable output function (XOF) BLAKE2Xs. +// +// BLAKE2s is optimized for 8- to 32-bit platforms and produces digests of any +// size between 1 and 32 bytes. +// For a detailed specification of BLAKE2s see https://blake2.net/blake2.pdf +// and for BLAKE2Xs see https://blake2.net/blake2x.pdf +// +// If you aren't sure which function you need, use BLAKE2s (Sum256 or New256). +// If you need a secret-key MAC (message authentication code), use the New256 +// function with a non-nil key. +// +// BLAKE2X is a construction to compute hash values larger than 32 bytes. It +// can produce hash values between 0 and 65535 bytes. +package blake2s // import "golang.org/x/crypto/blake2s" + +import ( + "encoding/binary" + "errors" + "hash" +) + +const ( + // The blocksize of BLAKE2s in bytes. + BlockSize = 64 + + // The hash size of BLAKE2s-256 in bytes. + Size = 32 + + // The hash size of BLAKE2s-128 in bytes. + Size128 = 16 +) + +var errKeySize = errors.New("blake2s: invalid key size") + +var iv = [8]uint32{ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +} + +// Sum256 returns the BLAKE2s-256 checksum of the data. +func Sum256(data []byte) [Size]byte { + var sum [Size]byte + checkSum(&sum, Size, data) + return sum +} + +// New256 returns a new hash.Hash computing the BLAKE2s-256 checksum. A non-nil +// key turns the hash into a MAC. The key must between zero and 32 bytes long. +// When the key is nil, the returned hash.Hash implements BinaryMarshaler +// and BinaryUnmarshaler for state (de)serialization as documented by hash.Hash. +func New256(key []byte) (hash.Hash, error) { return newDigest(Size, key) } + +// New128 returns a new hash.Hash computing the BLAKE2s-128 checksum given a +// non-empty key. Note that a 128-bit digest is too small to be secure as a +// cryptographic hash and should only be used as a MAC, thus the key argument +// is not optional. +func New128(key []byte) (hash.Hash, error) { + if len(key) == 0 { + return nil, errors.New("blake2s: a key is required for a 128-bit hash") + } + return newDigest(Size128, key) +} + +func newDigest(hashSize int, key []byte) (*digest, error) { + if len(key) > Size { + return nil, errKeySize + } + d := &digest{ + size: hashSize, + keyLen: len(key), + } + copy(d.key[:], key) + d.Reset() + return d, nil +} + +func checkSum(sum *[Size]byte, hashSize int, data []byte) { + var ( + h [8]uint32 + c [2]uint32 + ) + + h = iv + h[0] ^= uint32(hashSize) | (1 << 16) | (1 << 24) + + if length := len(data); length > BlockSize { + n := length &^ (BlockSize - 1) + if length == n { + n -= BlockSize + } + hashBlocks(&h, &c, 0, data[:n]) + data = data[n:] + } + + var block [BlockSize]byte + offset := copy(block[:], data) + remaining := uint32(BlockSize - offset) + + if c[0] < remaining { + c[1]-- + } + c[0] -= remaining + + hashBlocks(&h, &c, 0xFFFFFFFF, block[:]) + + for i, v := range h { + binary.LittleEndian.PutUint32(sum[4*i:], v) + } +} + +type digest struct { + h [8]uint32 + c [2]uint32 + size int + block [BlockSize]byte + offset int + + key [BlockSize]byte + keyLen int +} + +const ( + magic = "b2s" + marshaledSize = len(magic) + 8*4 + 2*4 + 1 + BlockSize + 1 +) + +func (d *digest) MarshalBinary() ([]byte, error) { + if d.keyLen != 0 { + return nil, errors.New("crypto/blake2s: cannot marshal MACs") + } + b := make([]byte, 0, marshaledSize) + b = append(b, magic...) + for i := 0; i < 8; i++ { + b = appendUint32(b, d.h[i]) + } + b = appendUint32(b, d.c[0]) + b = appendUint32(b, d.c[1]) + // Maximum value for size is 32 + b = append(b, byte(d.size)) + b = append(b, d.block[:]...) + b = append(b, byte(d.offset)) + return b, nil +} + +func (d *digest) UnmarshalBinary(b []byte) error { + if len(b) < len(magic) || string(b[:len(magic)]) != magic { + return errors.New("crypto/blake2s: invalid hash state identifier") + } + if len(b) != marshaledSize { + return errors.New("crypto/blake2s: invalid hash state size") + } + b = b[len(magic):] + for i := 0; i < 8; i++ { + b, d.h[i] = consumeUint32(b) + } + b, d.c[0] = consumeUint32(b) + b, d.c[1] = consumeUint32(b) + d.size = int(b[0]) + b = b[1:] + copy(d.block[:], b[:BlockSize]) + b = b[BlockSize:] + d.offset = int(b[0]) + return nil +} + +func (d *digest) BlockSize() int { return BlockSize } + +func (d *digest) Size() int { return d.size } + +func (d *digest) Reset() { + d.h = iv + d.h[0] ^= uint32(d.size) | (uint32(d.keyLen) << 8) | (1 << 16) | (1 << 24) + d.offset, d.c[0], d.c[1] = 0, 0, 0 + if d.keyLen > 0 { + d.block = d.key + d.offset = BlockSize + } +} + +func (d *digest) Write(p []byte) (n int, err error) { + n = len(p) + + if d.offset > 0 { + remaining := BlockSize - d.offset + if n <= remaining { + d.offset += copy(d.block[d.offset:], p) + return + } + copy(d.block[d.offset:], p[:remaining]) + hashBlocks(&d.h, &d.c, 0, d.block[:]) + d.offset = 0 + p = p[remaining:] + } + + if length := len(p); length > BlockSize { + nn := length &^ (BlockSize - 1) + if length == nn { + nn -= BlockSize + } + hashBlocks(&d.h, &d.c, 0, p[:nn]) + p = p[nn:] + } + + d.offset += copy(d.block[:], p) + return +} + +func (d *digest) Sum(sum []byte) []byte { + var hash [Size]byte + d.finalize(&hash) + return append(sum, hash[:d.size]...) +} + +func (d *digest) finalize(hash *[Size]byte) { + var block [BlockSize]byte + h := d.h + c := d.c + + copy(block[:], d.block[:d.offset]) + remaining := uint32(BlockSize - d.offset) + if c[0] < remaining { + c[1]-- + } + c[0] -= remaining + + hashBlocks(&h, &c, 0xFFFFFFFF, block[:]) + for i, v := range h { + binary.LittleEndian.PutUint32(hash[4*i:], v) + } +} + +func appendUint32(b []byte, x uint32) []byte { + var a [4]byte + binary.BigEndian.PutUint32(a[:], x) + return append(b, a[:]...) +} + +func consumeUint32(b []byte) ([]byte, uint32) { + x := binary.BigEndian.Uint32(b) + return b[4:], x +} diff --git a/vendor/golang.org/x/crypto/blake2s/blake2s_386.go b/vendor/golang.org/x/crypto/blake2s/blake2s_386.go new file mode 100644 index 000000000..b4463fb4d --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2s/blake2s_386.go @@ -0,0 +1,33 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 && gc && !purego +// +build 386,gc,!purego + +package blake2s + +import "golang.org/x/sys/cpu" + +var ( + useSSE4 = false + useSSSE3 = cpu.X86.HasSSSE3 + useSSE2 = cpu.X86.HasSSE2 +) + +//go:noescape +func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) + +//go:noescape +func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) + +func hashBlocks(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) { + switch { + case useSSSE3: + hashBlocksSSSE3(h, c, flag, blocks) + case useSSE2: + hashBlocksSSE2(h, c, flag, blocks) + default: + hashBlocksGeneric(h, c, flag, blocks) + } +} diff --git a/vendor/golang.org/x/crypto/blake2s/blake2s_386.s b/vendor/golang.org/x/crypto/blake2s/blake2s_386.s new file mode 100644 index 000000000..603d00ca3 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2s/blake2s_386.s @@ -0,0 +1,430 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 && gc && !purego +// +build 386,gc,!purego + +#include "textflag.h" + +DATA iv0<>+0x00(SB)/4, $0x6a09e667 +DATA iv0<>+0x04(SB)/4, $0xbb67ae85 +DATA iv0<>+0x08(SB)/4, $0x3c6ef372 +DATA iv0<>+0x0c(SB)/4, $0xa54ff53a +GLOBL iv0<>(SB), (NOPTR+RODATA), $16 + +DATA iv1<>+0x00(SB)/4, $0x510e527f +DATA iv1<>+0x04(SB)/4, $0x9b05688c +DATA iv1<>+0x08(SB)/4, $0x1f83d9ab +DATA iv1<>+0x0c(SB)/4, $0x5be0cd19 +GLOBL iv1<>(SB), (NOPTR+RODATA), $16 + +DATA rol16<>+0x00(SB)/8, $0x0504070601000302 +DATA rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A +GLOBL rol16<>(SB), (NOPTR+RODATA), $16 + +DATA rol8<>+0x00(SB)/8, $0x0407060500030201 +DATA rol8<>+0x08(SB)/8, $0x0C0F0E0D080B0A09 +GLOBL rol8<>(SB), (NOPTR+RODATA), $16 + +DATA counter<>+0x00(SB)/8, $0x40 +DATA counter<>+0x08(SB)/8, $0x0 +GLOBL counter<>(SB), (NOPTR+RODATA), $16 + +#define ROTL_SSE2(n, t, v) \ + MOVO v, t; \ + PSLLL $n, t; \ + PSRLL $(32-n), v; \ + PXOR t, v + +#define ROTL_SSSE3(c, v) \ + PSHUFB c, v + +#define ROUND_SSE2(v0, v1, v2, v3, m0, m1, m2, m3, t) \ + PADDL m0, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE2(16, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(20, t, v1); \ + PADDL m1, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE2(24, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(25, t, v1); \ + PSHUFL $0x39, v1, v1; \ + PSHUFL $0x4E, v2, v2; \ + PSHUFL $0x93, v3, v3; \ + PADDL m2, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE2(16, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(20, t, v1); \ + PADDL m3, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE2(24, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(25, t, v1); \ + PSHUFL $0x39, v3, v3; \ + PSHUFL $0x4E, v2, v2; \ + PSHUFL $0x93, v1, v1 + +#define ROUND_SSSE3(v0, v1, v2, v3, m0, m1, m2, m3, t, c16, c8) \ + PADDL m0, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSSE3(c16, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(20, t, v1); \ + PADDL m1, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSSE3(c8, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(25, t, v1); \ + PSHUFL $0x39, v1, v1; \ + PSHUFL $0x4E, v2, v2; \ + PSHUFL $0x93, v3, v3; \ + PADDL m2, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSSE3(c16, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(20, t, v1); \ + PADDL m3, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSSE3(c8, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(25, t, v1); \ + PSHUFL $0x39, v3, v3; \ + PSHUFL $0x4E, v2, v2; \ + PSHUFL $0x93, v1, v1 + +#define PRECOMPUTE(dst, off, src, t) \ + MOVL 0*4(src), t; \ + MOVL t, 0*4+off+0(dst); \ + MOVL t, 9*4+off+64(dst); \ + MOVL t, 5*4+off+128(dst); \ + MOVL t, 14*4+off+192(dst); \ + MOVL t, 4*4+off+256(dst); \ + MOVL t, 2*4+off+320(dst); \ + MOVL t, 8*4+off+384(dst); \ + MOVL t, 12*4+off+448(dst); \ + MOVL t, 3*4+off+512(dst); \ + MOVL t, 15*4+off+576(dst); \ + MOVL 1*4(src), t; \ + MOVL t, 4*4+off+0(dst); \ + MOVL t, 8*4+off+64(dst); \ + MOVL t, 14*4+off+128(dst); \ + MOVL t, 5*4+off+192(dst); \ + MOVL t, 12*4+off+256(dst); \ + MOVL t, 11*4+off+320(dst); \ + MOVL t, 1*4+off+384(dst); \ + MOVL t, 6*4+off+448(dst); \ + MOVL t, 10*4+off+512(dst); \ + MOVL t, 3*4+off+576(dst); \ + MOVL 2*4(src), t; \ + MOVL t, 1*4+off+0(dst); \ + MOVL t, 13*4+off+64(dst); \ + MOVL t, 6*4+off+128(dst); \ + MOVL t, 8*4+off+192(dst); \ + MOVL t, 2*4+off+256(dst); \ + MOVL t, 0*4+off+320(dst); \ + MOVL t, 14*4+off+384(dst); \ + MOVL t, 11*4+off+448(dst); \ + MOVL t, 12*4+off+512(dst); \ + MOVL t, 4*4+off+576(dst); \ + MOVL 3*4(src), t; \ + MOVL t, 5*4+off+0(dst); \ + MOVL t, 15*4+off+64(dst); \ + MOVL t, 9*4+off+128(dst); \ + MOVL t, 1*4+off+192(dst); \ + MOVL t, 11*4+off+256(dst); \ + MOVL t, 7*4+off+320(dst); \ + MOVL t, 13*4+off+384(dst); \ + MOVL t, 3*4+off+448(dst); \ + MOVL t, 6*4+off+512(dst); \ + MOVL t, 10*4+off+576(dst); \ + MOVL 4*4(src), t; \ + MOVL t, 2*4+off+0(dst); \ + MOVL t, 1*4+off+64(dst); \ + MOVL t, 15*4+off+128(dst); \ + MOVL t, 10*4+off+192(dst); \ + MOVL t, 6*4+off+256(dst); \ + MOVL t, 8*4+off+320(dst); \ + MOVL t, 3*4+off+384(dst); \ + MOVL t, 13*4+off+448(dst); \ + MOVL t, 14*4+off+512(dst); \ + MOVL t, 5*4+off+576(dst); \ + MOVL 5*4(src), t; \ + MOVL t, 6*4+off+0(dst); \ + MOVL t, 11*4+off+64(dst); \ + MOVL t, 2*4+off+128(dst); \ + MOVL t, 9*4+off+192(dst); \ + MOVL t, 1*4+off+256(dst); \ + MOVL t, 13*4+off+320(dst); \ + MOVL t, 4*4+off+384(dst); \ + MOVL t, 8*4+off+448(dst); \ + MOVL t, 15*4+off+512(dst); \ + MOVL t, 7*4+off+576(dst); \ + MOVL 6*4(src), t; \ + MOVL t, 3*4+off+0(dst); \ + MOVL t, 7*4+off+64(dst); \ + MOVL t, 13*4+off+128(dst); \ + MOVL t, 12*4+off+192(dst); \ + MOVL t, 10*4+off+256(dst); \ + MOVL t, 1*4+off+320(dst); \ + MOVL t, 9*4+off+384(dst); \ + MOVL t, 14*4+off+448(dst); \ + MOVL t, 0*4+off+512(dst); \ + MOVL t, 6*4+off+576(dst); \ + MOVL 7*4(src), t; \ + MOVL t, 7*4+off+0(dst); \ + MOVL t, 14*4+off+64(dst); \ + MOVL t, 10*4+off+128(dst); \ + MOVL t, 0*4+off+192(dst); \ + MOVL t, 5*4+off+256(dst); \ + MOVL t, 9*4+off+320(dst); \ + MOVL t, 12*4+off+384(dst); \ + MOVL t, 1*4+off+448(dst); \ + MOVL t, 13*4+off+512(dst); \ + MOVL t, 2*4+off+576(dst); \ + MOVL 8*4(src), t; \ + MOVL t, 8*4+off+0(dst); \ + MOVL t, 5*4+off+64(dst); \ + MOVL t, 4*4+off+128(dst); \ + MOVL t, 15*4+off+192(dst); \ + MOVL t, 14*4+off+256(dst); \ + MOVL t, 3*4+off+320(dst); \ + MOVL t, 11*4+off+384(dst); \ + MOVL t, 10*4+off+448(dst); \ + MOVL t, 7*4+off+512(dst); \ + MOVL t, 1*4+off+576(dst); \ + MOVL 9*4(src), t; \ + MOVL t, 12*4+off+0(dst); \ + MOVL t, 2*4+off+64(dst); \ + MOVL t, 11*4+off+128(dst); \ + MOVL t, 4*4+off+192(dst); \ + MOVL t, 0*4+off+256(dst); \ + MOVL t, 15*4+off+320(dst); \ + MOVL t, 10*4+off+384(dst); \ + MOVL t, 7*4+off+448(dst); \ + MOVL t, 5*4+off+512(dst); \ + MOVL t, 9*4+off+576(dst); \ + MOVL 10*4(src), t; \ + MOVL t, 9*4+off+0(dst); \ + MOVL t, 4*4+off+64(dst); \ + MOVL t, 8*4+off+128(dst); \ + MOVL t, 13*4+off+192(dst); \ + MOVL t, 3*4+off+256(dst); \ + MOVL t, 5*4+off+320(dst); \ + MOVL t, 7*4+off+384(dst); \ + MOVL t, 15*4+off+448(dst); \ + MOVL t, 11*4+off+512(dst); \ + MOVL t, 0*4+off+576(dst); \ + MOVL 11*4(src), t; \ + MOVL t, 13*4+off+0(dst); \ + MOVL t, 10*4+off+64(dst); \ + MOVL t, 0*4+off+128(dst); \ + MOVL t, 3*4+off+192(dst); \ + MOVL t, 9*4+off+256(dst); \ + MOVL t, 6*4+off+320(dst); \ + MOVL t, 15*4+off+384(dst); \ + MOVL t, 4*4+off+448(dst); \ + MOVL t, 2*4+off+512(dst); \ + MOVL t, 12*4+off+576(dst); \ + MOVL 12*4(src), t; \ + MOVL t, 10*4+off+0(dst); \ + MOVL t, 12*4+off+64(dst); \ + MOVL t, 1*4+off+128(dst); \ + MOVL t, 6*4+off+192(dst); \ + MOVL t, 13*4+off+256(dst); \ + MOVL t, 4*4+off+320(dst); \ + MOVL t, 0*4+off+384(dst); \ + MOVL t, 2*4+off+448(dst); \ + MOVL t, 8*4+off+512(dst); \ + MOVL t, 14*4+off+576(dst); \ + MOVL 13*4(src), t; \ + MOVL t, 14*4+off+0(dst); \ + MOVL t, 3*4+off+64(dst); \ + MOVL t, 7*4+off+128(dst); \ + MOVL t, 2*4+off+192(dst); \ + MOVL t, 15*4+off+256(dst); \ + MOVL t, 12*4+off+320(dst); \ + MOVL t, 6*4+off+384(dst); \ + MOVL t, 0*4+off+448(dst); \ + MOVL t, 9*4+off+512(dst); \ + MOVL t, 11*4+off+576(dst); \ + MOVL 14*4(src), t; \ + MOVL t, 11*4+off+0(dst); \ + MOVL t, 0*4+off+64(dst); \ + MOVL t, 12*4+off+128(dst); \ + MOVL t, 7*4+off+192(dst); \ + MOVL t, 8*4+off+256(dst); \ + MOVL t, 14*4+off+320(dst); \ + MOVL t, 2*4+off+384(dst); \ + MOVL t, 5*4+off+448(dst); \ + MOVL t, 1*4+off+512(dst); \ + MOVL t, 13*4+off+576(dst); \ + MOVL 15*4(src), t; \ + MOVL t, 15*4+off+0(dst); \ + MOVL t, 6*4+off+64(dst); \ + MOVL t, 3*4+off+128(dst); \ + MOVL t, 11*4+off+192(dst); \ + MOVL t, 7*4+off+256(dst); \ + MOVL t, 10*4+off+320(dst); \ + MOVL t, 5*4+off+384(dst); \ + MOVL t, 9*4+off+448(dst); \ + MOVL t, 4*4+off+512(dst); \ + MOVL t, 8*4+off+576(dst) + +// func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) +TEXT ·hashBlocksSSE2(SB), 0, $672-24 // frame = 656 + 16 byte alignment + MOVL h+0(FP), AX + MOVL c+4(FP), BX + MOVL flag+8(FP), CX + MOVL blocks_base+12(FP), SI + MOVL blocks_len+16(FP), DX + + MOVL SP, DI + ADDL $15, DI + ANDL $~15, DI + + MOVL CX, 8(DI) + MOVL 0(BX), CX + MOVL CX, 0(DI) + MOVL 4(BX), CX + MOVL CX, 4(DI) + XORL CX, CX + MOVL CX, 12(DI) + + MOVOU 0(AX), X0 + MOVOU 16(AX), X1 + MOVOU counter<>(SB), X2 + +loop: + MOVO X0, X4 + MOVO X1, X5 + MOVOU iv0<>(SB), X6 + MOVOU iv1<>(SB), X7 + + MOVO 0(DI), X3 + PADDQ X2, X3 + PXOR X3, X7 + MOVO X3, 0(DI) + + PRECOMPUTE(DI, 16, SI, CX) + ROUND_SSE2(X4, X5, X6, X7, 16(DI), 32(DI), 48(DI), 64(DI), X3) + ROUND_SSE2(X4, X5, X6, X7, 16+64(DI), 32+64(DI), 48+64(DI), 64+64(DI), X3) + ROUND_SSE2(X4, X5, X6, X7, 16+128(DI), 32+128(DI), 48+128(DI), 64+128(DI), X3) + ROUND_SSE2(X4, X5, X6, X7, 16+192(DI), 32+192(DI), 48+192(DI), 64+192(DI), X3) + ROUND_SSE2(X4, X5, X6, X7, 16+256(DI), 32+256(DI), 48+256(DI), 64+256(DI), X3) + ROUND_SSE2(X4, X5, X6, X7, 16+320(DI), 32+320(DI), 48+320(DI), 64+320(DI), X3) + ROUND_SSE2(X4, X5, X6, X7, 16+384(DI), 32+384(DI), 48+384(DI), 64+384(DI), X3) + ROUND_SSE2(X4, X5, X6, X7, 16+448(DI), 32+448(DI), 48+448(DI), 64+448(DI), X3) + ROUND_SSE2(X4, X5, X6, X7, 16+512(DI), 32+512(DI), 48+512(DI), 64+512(DI), X3) + ROUND_SSE2(X4, X5, X6, X7, 16+576(DI), 32+576(DI), 48+576(DI), 64+576(DI), X3) + + PXOR X4, X0 + PXOR X5, X1 + PXOR X6, X0 + PXOR X7, X1 + + LEAL 64(SI), SI + SUBL $64, DX + JNE loop + + MOVL 0(DI), CX + MOVL CX, 0(BX) + MOVL 4(DI), CX + MOVL CX, 4(BX) + + MOVOU X0, 0(AX) + MOVOU X1, 16(AX) + + RET + +// func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) +TEXT ·hashBlocksSSSE3(SB), 0, $704-24 // frame = 688 + 16 byte alignment + MOVL h+0(FP), AX + MOVL c+4(FP), BX + MOVL flag+8(FP), CX + MOVL blocks_base+12(FP), SI + MOVL blocks_len+16(FP), DX + + MOVL SP, DI + ADDL $15, DI + ANDL $~15, DI + + MOVL CX, 8(DI) + MOVL 0(BX), CX + MOVL CX, 0(DI) + MOVL 4(BX), CX + MOVL CX, 4(DI) + XORL CX, CX + MOVL CX, 12(DI) + + MOVOU 0(AX), X0 + MOVOU 16(AX), X1 + MOVOU counter<>(SB), X2 + +loop: + MOVO X0, 656(DI) + MOVO X1, 672(DI) + MOVO X0, X4 + MOVO X1, X5 + MOVOU iv0<>(SB), X6 + MOVOU iv1<>(SB), X7 + + MOVO 0(DI), X3 + PADDQ X2, X3 + PXOR X3, X7 + MOVO X3, 0(DI) + + MOVOU rol16<>(SB), X0 + MOVOU rol8<>(SB), X1 + + PRECOMPUTE(DI, 16, SI, CX) + ROUND_SSSE3(X4, X5, X6, X7, 16(DI), 32(DI), 48(DI), 64(DI), X3, X0, X1) + ROUND_SSSE3(X4, X5, X6, X7, 16+64(DI), 32+64(DI), 48+64(DI), 64+64(DI), X3, X0, X1) + ROUND_SSSE3(X4, X5, X6, X7, 16+128(DI), 32+128(DI), 48+128(DI), 64+128(DI), X3, X0, X1) + ROUND_SSSE3(X4, X5, X6, X7, 16+192(DI), 32+192(DI), 48+192(DI), 64+192(DI), X3, X0, X1) + ROUND_SSSE3(X4, X5, X6, X7, 16+256(DI), 32+256(DI), 48+256(DI), 64+256(DI), X3, X0, X1) + ROUND_SSSE3(X4, X5, X6, X7, 16+320(DI), 32+320(DI), 48+320(DI), 64+320(DI), X3, X0, X1) + ROUND_SSSE3(X4, X5, X6, X7, 16+384(DI), 32+384(DI), 48+384(DI), 64+384(DI), X3, X0, X1) + ROUND_SSSE3(X4, X5, X6, X7, 16+448(DI), 32+448(DI), 48+448(DI), 64+448(DI), X3, X0, X1) + ROUND_SSSE3(X4, X5, X6, X7, 16+512(DI), 32+512(DI), 48+512(DI), 64+512(DI), X3, X0, X1) + ROUND_SSSE3(X4, X5, X6, X7, 16+576(DI), 32+576(DI), 48+576(DI), 64+576(DI), X3, X0, X1) + + MOVO 656(DI), X0 + MOVO 672(DI), X1 + PXOR X4, X0 + PXOR X5, X1 + PXOR X6, X0 + PXOR X7, X1 + + LEAL 64(SI), SI + SUBL $64, DX + JNE loop + + MOVL 0(DI), CX + MOVL CX, 0(BX) + MOVL 4(DI), CX + MOVL CX, 4(BX) + + MOVOU X0, 0(AX) + MOVOU X1, 16(AX) + + RET diff --git a/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.go b/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.go new file mode 100644 index 000000000..becdaa120 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.go @@ -0,0 +1,38 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 && gc && !purego +// +build amd64,gc,!purego + +package blake2s + +import "golang.org/x/sys/cpu" + +var ( + useSSE4 = cpu.X86.HasSSE41 + useSSSE3 = cpu.X86.HasSSSE3 + useSSE2 = cpu.X86.HasSSE2 +) + +//go:noescape +func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) + +//go:noescape +func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) + +//go:noescape +func hashBlocksSSE4(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) + +func hashBlocks(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) { + switch { + case useSSE4: + hashBlocksSSE4(h, c, flag, blocks) + case useSSSE3: + hashBlocksSSSE3(h, c, flag, blocks) + case useSSE2: + hashBlocksSSE2(h, c, flag, blocks) + default: + hashBlocksGeneric(h, c, flag, blocks) + } +} diff --git a/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.s b/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.s new file mode 100644 index 000000000..e9df7a7c2 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.s @@ -0,0 +1,433 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build amd64 && gc && !purego +// +build amd64,gc,!purego + +#include "textflag.h" + +DATA iv0<>+0x00(SB)/4, $0x6a09e667 +DATA iv0<>+0x04(SB)/4, $0xbb67ae85 +DATA iv0<>+0x08(SB)/4, $0x3c6ef372 +DATA iv0<>+0x0c(SB)/4, $0xa54ff53a +GLOBL iv0<>(SB), (NOPTR+RODATA), $16 + +DATA iv1<>+0x00(SB)/4, $0x510e527f +DATA iv1<>+0x04(SB)/4, $0x9b05688c +DATA iv1<>+0x08(SB)/4, $0x1f83d9ab +DATA iv1<>+0x0c(SB)/4, $0x5be0cd19 +GLOBL iv1<>(SB), (NOPTR+RODATA), $16 + +DATA rol16<>+0x00(SB)/8, $0x0504070601000302 +DATA rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A +GLOBL rol16<>(SB), (NOPTR+RODATA), $16 + +DATA rol8<>+0x00(SB)/8, $0x0407060500030201 +DATA rol8<>+0x08(SB)/8, $0x0C0F0E0D080B0A09 +GLOBL rol8<>(SB), (NOPTR+RODATA), $16 + +DATA counter<>+0x00(SB)/8, $0x40 +DATA counter<>+0x08(SB)/8, $0x0 +GLOBL counter<>(SB), (NOPTR+RODATA), $16 + +#define ROTL_SSE2(n, t, v) \ + MOVO v, t; \ + PSLLL $n, t; \ + PSRLL $(32-n), v; \ + PXOR t, v + +#define ROTL_SSSE3(c, v) \ + PSHUFB c, v + +#define ROUND_SSE2(v0, v1, v2, v3, m0, m1, m2, m3, t) \ + PADDL m0, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE2(16, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(20, t, v1); \ + PADDL m1, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE2(24, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(25, t, v1); \ + PSHUFL $0x39, v1, v1; \ + PSHUFL $0x4E, v2, v2; \ + PSHUFL $0x93, v3, v3; \ + PADDL m2, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE2(16, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(20, t, v1); \ + PADDL m3, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSE2(24, t, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(25, t, v1); \ + PSHUFL $0x39, v3, v3; \ + PSHUFL $0x4E, v2, v2; \ + PSHUFL $0x93, v1, v1 + +#define ROUND_SSSE3(v0, v1, v2, v3, m0, m1, m2, m3, t, c16, c8) \ + PADDL m0, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSSE3(c16, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(20, t, v1); \ + PADDL m1, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSSE3(c8, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(25, t, v1); \ + PSHUFL $0x39, v1, v1; \ + PSHUFL $0x4E, v2, v2; \ + PSHUFL $0x93, v3, v3; \ + PADDL m2, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSSE3(c16, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(20, t, v1); \ + PADDL m3, v0; \ + PADDL v1, v0; \ + PXOR v0, v3; \ + ROTL_SSSE3(c8, v3); \ + PADDL v3, v2; \ + PXOR v2, v1; \ + ROTL_SSE2(25, t, v1); \ + PSHUFL $0x39, v3, v3; \ + PSHUFL $0x4E, v2, v2; \ + PSHUFL $0x93, v1, v1 + + +#define LOAD_MSG_SSE4(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15) \ + MOVL i0*4(src), m0; \ + PINSRD $1, i1*4(src), m0; \ + PINSRD $2, i2*4(src), m0; \ + PINSRD $3, i3*4(src), m0; \ + MOVL i4*4(src), m1; \ + PINSRD $1, i5*4(src), m1; \ + PINSRD $2, i6*4(src), m1; \ + PINSRD $3, i7*4(src), m1; \ + MOVL i8*4(src), m2; \ + PINSRD $1, i9*4(src), m2; \ + PINSRD $2, i10*4(src), m2; \ + PINSRD $3, i11*4(src), m2; \ + MOVL i12*4(src), m3; \ + PINSRD $1, i13*4(src), m3; \ + PINSRD $2, i14*4(src), m3; \ + PINSRD $3, i15*4(src), m3 + +#define PRECOMPUTE_MSG(dst, off, src, R8, R9, R10, R11, R12, R13, R14, R15) \ + MOVQ 0*4(src), R8; \ + MOVQ 2*4(src), R9; \ + MOVQ 4*4(src), R10; \ + MOVQ 6*4(src), R11; \ + MOVQ 8*4(src), R12; \ + MOVQ 10*4(src), R13; \ + MOVQ 12*4(src), R14; \ + MOVQ 14*4(src), R15; \ + \ + MOVL R8, 0*4+off+0(dst); \ + MOVL R8, 9*4+off+64(dst); \ + MOVL R8, 5*4+off+128(dst); \ + MOVL R8, 14*4+off+192(dst); \ + MOVL R8, 4*4+off+256(dst); \ + MOVL R8, 2*4+off+320(dst); \ + MOVL R8, 8*4+off+384(dst); \ + MOVL R8, 12*4+off+448(dst); \ + MOVL R8, 3*4+off+512(dst); \ + MOVL R8, 15*4+off+576(dst); \ + SHRQ $32, R8; \ + MOVL R8, 4*4+off+0(dst); \ + MOVL R8, 8*4+off+64(dst); \ + MOVL R8, 14*4+off+128(dst); \ + MOVL R8, 5*4+off+192(dst); \ + MOVL R8, 12*4+off+256(dst); \ + MOVL R8, 11*4+off+320(dst); \ + MOVL R8, 1*4+off+384(dst); \ + MOVL R8, 6*4+off+448(dst); \ + MOVL R8, 10*4+off+512(dst); \ + MOVL R8, 3*4+off+576(dst); \ + \ + MOVL R9, 1*4+off+0(dst); \ + MOVL R9, 13*4+off+64(dst); \ + MOVL R9, 6*4+off+128(dst); \ + MOVL R9, 8*4+off+192(dst); \ + MOVL R9, 2*4+off+256(dst); \ + MOVL R9, 0*4+off+320(dst); \ + MOVL R9, 14*4+off+384(dst); \ + MOVL R9, 11*4+off+448(dst); \ + MOVL R9, 12*4+off+512(dst); \ + MOVL R9, 4*4+off+576(dst); \ + SHRQ $32, R9; \ + MOVL R9, 5*4+off+0(dst); \ + MOVL R9, 15*4+off+64(dst); \ + MOVL R9, 9*4+off+128(dst); \ + MOVL R9, 1*4+off+192(dst); \ + MOVL R9, 11*4+off+256(dst); \ + MOVL R9, 7*4+off+320(dst); \ + MOVL R9, 13*4+off+384(dst); \ + MOVL R9, 3*4+off+448(dst); \ + MOVL R9, 6*4+off+512(dst); \ + MOVL R9, 10*4+off+576(dst); \ + \ + MOVL R10, 2*4+off+0(dst); \ + MOVL R10, 1*4+off+64(dst); \ + MOVL R10, 15*4+off+128(dst); \ + MOVL R10, 10*4+off+192(dst); \ + MOVL R10, 6*4+off+256(dst); \ + MOVL R10, 8*4+off+320(dst); \ + MOVL R10, 3*4+off+384(dst); \ + MOVL R10, 13*4+off+448(dst); \ + MOVL R10, 14*4+off+512(dst); \ + MOVL R10, 5*4+off+576(dst); \ + SHRQ $32, R10; \ + MOVL R10, 6*4+off+0(dst); \ + MOVL R10, 11*4+off+64(dst); \ + MOVL R10, 2*4+off+128(dst); \ + MOVL R10, 9*4+off+192(dst); \ + MOVL R10, 1*4+off+256(dst); \ + MOVL R10, 13*4+off+320(dst); \ + MOVL R10, 4*4+off+384(dst); \ + MOVL R10, 8*4+off+448(dst); \ + MOVL R10, 15*4+off+512(dst); \ + MOVL R10, 7*4+off+576(dst); \ + \ + MOVL R11, 3*4+off+0(dst); \ + MOVL R11, 7*4+off+64(dst); \ + MOVL R11, 13*4+off+128(dst); \ + MOVL R11, 12*4+off+192(dst); \ + MOVL R11, 10*4+off+256(dst); \ + MOVL R11, 1*4+off+320(dst); \ + MOVL R11, 9*4+off+384(dst); \ + MOVL R11, 14*4+off+448(dst); \ + MOVL R11, 0*4+off+512(dst); \ + MOVL R11, 6*4+off+576(dst); \ + SHRQ $32, R11; \ + MOVL R11, 7*4+off+0(dst); \ + MOVL R11, 14*4+off+64(dst); \ + MOVL R11, 10*4+off+128(dst); \ + MOVL R11, 0*4+off+192(dst); \ + MOVL R11, 5*4+off+256(dst); \ + MOVL R11, 9*4+off+320(dst); \ + MOVL R11, 12*4+off+384(dst); \ + MOVL R11, 1*4+off+448(dst); \ + MOVL R11, 13*4+off+512(dst); \ + MOVL R11, 2*4+off+576(dst); \ + \ + MOVL R12, 8*4+off+0(dst); \ + MOVL R12, 5*4+off+64(dst); \ + MOVL R12, 4*4+off+128(dst); \ + MOVL R12, 15*4+off+192(dst); \ + MOVL R12, 14*4+off+256(dst); \ + MOVL R12, 3*4+off+320(dst); \ + MOVL R12, 11*4+off+384(dst); \ + MOVL R12, 10*4+off+448(dst); \ + MOVL R12, 7*4+off+512(dst); \ + MOVL R12, 1*4+off+576(dst); \ + SHRQ $32, R12; \ + MOVL R12, 12*4+off+0(dst); \ + MOVL R12, 2*4+off+64(dst); \ + MOVL R12, 11*4+off+128(dst); \ + MOVL R12, 4*4+off+192(dst); \ + MOVL R12, 0*4+off+256(dst); \ + MOVL R12, 15*4+off+320(dst); \ + MOVL R12, 10*4+off+384(dst); \ + MOVL R12, 7*4+off+448(dst); \ + MOVL R12, 5*4+off+512(dst); \ + MOVL R12, 9*4+off+576(dst); \ + \ + MOVL R13, 9*4+off+0(dst); \ + MOVL R13, 4*4+off+64(dst); \ + MOVL R13, 8*4+off+128(dst); \ + MOVL R13, 13*4+off+192(dst); \ + MOVL R13, 3*4+off+256(dst); \ + MOVL R13, 5*4+off+320(dst); \ + MOVL R13, 7*4+off+384(dst); \ + MOVL R13, 15*4+off+448(dst); \ + MOVL R13, 11*4+off+512(dst); \ + MOVL R13, 0*4+off+576(dst); \ + SHRQ $32, R13; \ + MOVL R13, 13*4+off+0(dst); \ + MOVL R13, 10*4+off+64(dst); \ + MOVL R13, 0*4+off+128(dst); \ + MOVL R13, 3*4+off+192(dst); \ + MOVL R13, 9*4+off+256(dst); \ + MOVL R13, 6*4+off+320(dst); \ + MOVL R13, 15*4+off+384(dst); \ + MOVL R13, 4*4+off+448(dst); \ + MOVL R13, 2*4+off+512(dst); \ + MOVL R13, 12*4+off+576(dst); \ + \ + MOVL R14, 10*4+off+0(dst); \ + MOVL R14, 12*4+off+64(dst); \ + MOVL R14, 1*4+off+128(dst); \ + MOVL R14, 6*4+off+192(dst); \ + MOVL R14, 13*4+off+256(dst); \ + MOVL R14, 4*4+off+320(dst); \ + MOVL R14, 0*4+off+384(dst); \ + MOVL R14, 2*4+off+448(dst); \ + MOVL R14, 8*4+off+512(dst); \ + MOVL R14, 14*4+off+576(dst); \ + SHRQ $32, R14; \ + MOVL R14, 14*4+off+0(dst); \ + MOVL R14, 3*4+off+64(dst); \ + MOVL R14, 7*4+off+128(dst); \ + MOVL R14, 2*4+off+192(dst); \ + MOVL R14, 15*4+off+256(dst); \ + MOVL R14, 12*4+off+320(dst); \ + MOVL R14, 6*4+off+384(dst); \ + MOVL R14, 0*4+off+448(dst); \ + MOVL R14, 9*4+off+512(dst); \ + MOVL R14, 11*4+off+576(dst); \ + \ + MOVL R15, 11*4+off+0(dst); \ + MOVL R15, 0*4+off+64(dst); \ + MOVL R15, 12*4+off+128(dst); \ + MOVL R15, 7*4+off+192(dst); \ + MOVL R15, 8*4+off+256(dst); \ + MOVL R15, 14*4+off+320(dst); \ + MOVL R15, 2*4+off+384(dst); \ + MOVL R15, 5*4+off+448(dst); \ + MOVL R15, 1*4+off+512(dst); \ + MOVL R15, 13*4+off+576(dst); \ + SHRQ $32, R15; \ + MOVL R15, 15*4+off+0(dst); \ + MOVL R15, 6*4+off+64(dst); \ + MOVL R15, 3*4+off+128(dst); \ + MOVL R15, 11*4+off+192(dst); \ + MOVL R15, 7*4+off+256(dst); \ + MOVL R15, 10*4+off+320(dst); \ + MOVL R15, 5*4+off+384(dst); \ + MOVL R15, 9*4+off+448(dst); \ + MOVL R15, 4*4+off+512(dst); \ + MOVL R15, 8*4+off+576(dst) + +#define BLAKE2s_SSE2() \ + PRECOMPUTE_MSG(BP, 16, SI, R8, R9, R10, R11, R12, R13, R14, R15); \ + ROUND_SSE2(X4, X5, X6, X7, 16(BP), 32(BP), 48(BP), 64(BP), X8); \ + ROUND_SSE2(X4, X5, X6, X7, 16+64(BP), 32+64(BP), 48+64(BP), 64+64(BP), X8); \ + ROUND_SSE2(X4, X5, X6, X7, 16+128(BP), 32+128(BP), 48+128(BP), 64+128(BP), X8); \ + ROUND_SSE2(X4, X5, X6, X7, 16+192(BP), 32+192(BP), 48+192(BP), 64+192(BP), X8); \ + ROUND_SSE2(X4, X5, X6, X7, 16+256(BP), 32+256(BP), 48+256(BP), 64+256(BP), X8); \ + ROUND_SSE2(X4, X5, X6, X7, 16+320(BP), 32+320(BP), 48+320(BP), 64+320(BP), X8); \ + ROUND_SSE2(X4, X5, X6, X7, 16+384(BP), 32+384(BP), 48+384(BP), 64+384(BP), X8); \ + ROUND_SSE2(X4, X5, X6, X7, 16+448(BP), 32+448(BP), 48+448(BP), 64+448(BP), X8); \ + ROUND_SSE2(X4, X5, X6, X7, 16+512(BP), 32+512(BP), 48+512(BP), 64+512(BP), X8); \ + ROUND_SSE2(X4, X5, X6, X7, 16+576(BP), 32+576(BP), 48+576(BP), 64+576(BP), X8) + +#define BLAKE2s_SSSE3() \ + PRECOMPUTE_MSG(BP, 16, SI, R8, R9, R10, R11, R12, R13, R14, R15); \ + ROUND_SSSE3(X4, X5, X6, X7, 16(BP), 32(BP), 48(BP), 64(BP), X8, X13, X14); \ + ROUND_SSSE3(X4, X5, X6, X7, 16+64(BP), 32+64(BP), 48+64(BP), 64+64(BP), X8, X13, X14); \ + ROUND_SSSE3(X4, X5, X6, X7, 16+128(BP), 32+128(BP), 48+128(BP), 64+128(BP), X8, X13, X14); \ + ROUND_SSSE3(X4, X5, X6, X7, 16+192(BP), 32+192(BP), 48+192(BP), 64+192(BP), X8, X13, X14); \ + ROUND_SSSE3(X4, X5, X6, X7, 16+256(BP), 32+256(BP), 48+256(BP), 64+256(BP), X8, X13, X14); \ + ROUND_SSSE3(X4, X5, X6, X7, 16+320(BP), 32+320(BP), 48+320(BP), 64+320(BP), X8, X13, X14); \ + ROUND_SSSE3(X4, X5, X6, X7, 16+384(BP), 32+384(BP), 48+384(BP), 64+384(BP), X8, X13, X14); \ + ROUND_SSSE3(X4, X5, X6, X7, 16+448(BP), 32+448(BP), 48+448(BP), 64+448(BP), X8, X13, X14); \ + ROUND_SSSE3(X4, X5, X6, X7, 16+512(BP), 32+512(BP), 48+512(BP), 64+512(BP), X8, X13, X14); \ + ROUND_SSSE3(X4, X5, X6, X7, 16+576(BP), 32+576(BP), 48+576(BP), 64+576(BP), X8, X13, X14) + +#define BLAKE2s_SSE4() \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ + LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0); \ + ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14) + +#define HASH_BLOCKS(h, c, flag, blocks_base, blocks_len, BLAKE2s_FUNC) \ + MOVQ h, AX; \ + MOVQ c, BX; \ + MOVL flag, CX; \ + MOVQ blocks_base, SI; \ + MOVQ blocks_len, DX; \ + \ + MOVQ SP, BP; \ + ADDQ $15, BP; \ + ANDQ $~15, BP; \ + \ + MOVQ 0(BX), R9; \ + MOVQ R9, 0(BP); \ + MOVQ CX, 8(BP); \ + \ + MOVOU 0(AX), X0; \ + MOVOU 16(AX), X1; \ + MOVOU iv0<>(SB), X2; \ + MOVOU iv1<>(SB), X3 \ + \ + MOVOU counter<>(SB), X12; \ + MOVOU rol16<>(SB), X13; \ + MOVOU rol8<>(SB), X14; \ + MOVO 0(BP), X15; \ + \ + loop: \ + MOVO X0, X4; \ + MOVO X1, X5; \ + MOVO X2, X6; \ + MOVO X3, X7; \ + \ + PADDQ X12, X15; \ + PXOR X15, X7; \ + \ + BLAKE2s_FUNC(); \ + \ + PXOR X4, X0; \ + PXOR X5, X1; \ + PXOR X6, X0; \ + PXOR X7, X1; \ + \ + LEAQ 64(SI), SI; \ + SUBQ $64, DX; \ + JNE loop; \ + \ + MOVO X15, 0(BP); \ + MOVQ 0(BP), R9; \ + MOVQ R9, 0(BX); \ + \ + MOVOU X0, 0(AX); \ + MOVOU X1, 16(AX) + +// func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) +TEXT ·hashBlocksSSE2(SB), 0, $672-48 // frame = 656 + 16 byte alignment + HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), BLAKE2s_SSE2) + RET + +// func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) +TEXT ·hashBlocksSSSE3(SB), 0, $672-48 // frame = 656 + 16 byte alignment + HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), BLAKE2s_SSSE3) + RET + +// func hashBlocksSSE4(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) +TEXT ·hashBlocksSSE4(SB), 0, $32-48 // frame = 16 + 16 byte alignment + HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), BLAKE2s_SSE4) + RET diff --git a/vendor/golang.org/x/crypto/blake2s/blake2s_generic.go b/vendor/golang.org/x/crypto/blake2s/blake2s_generic.go new file mode 100644 index 000000000..24a1ff22a --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2s/blake2s_generic.go @@ -0,0 +1,178 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package blake2s + +import ( + "math/bits" +) + +// the precomputed values for BLAKE2s +// there are 10 16-byte arrays - one for each round +// the entries are calculated from the sigma constants. +var precomputed = [10][16]byte{ + {0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, + {14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, + {11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4}, + {7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8}, + {9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13}, + {2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9}, + {12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11}, + {13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10}, + {6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5}, + {10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0}, +} + +func hashBlocksGeneric(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) { + var m [16]uint32 + c0, c1 := c[0], c[1] + + for i := 0; i < len(blocks); { + c0 += BlockSize + if c0 < BlockSize { + c1++ + } + + v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] + v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7] + v12 ^= c0 + v13 ^= c1 + v14 ^= flag + + for j := range m { + m[j] = uint32(blocks[i]) | uint32(blocks[i+1])<<8 | uint32(blocks[i+2])<<16 | uint32(blocks[i+3])<<24 + i += 4 + } + + for k := range precomputed { + s := &(precomputed[k]) + + v0 += m[s[0]] + v0 += v4 + v12 ^= v0 + v12 = bits.RotateLeft32(v12, -16) + v8 += v12 + v4 ^= v8 + v4 = bits.RotateLeft32(v4, -12) + v1 += m[s[1]] + v1 += v5 + v13 ^= v1 + v13 = bits.RotateLeft32(v13, -16) + v9 += v13 + v5 ^= v9 + v5 = bits.RotateLeft32(v5, -12) + v2 += m[s[2]] + v2 += v6 + v14 ^= v2 + v14 = bits.RotateLeft32(v14, -16) + v10 += v14 + v6 ^= v10 + v6 = bits.RotateLeft32(v6, -12) + v3 += m[s[3]] + v3 += v7 + v15 ^= v3 + v15 = bits.RotateLeft32(v15, -16) + v11 += v15 + v7 ^= v11 + v7 = bits.RotateLeft32(v7, -12) + + v0 += m[s[4]] + v0 += v4 + v12 ^= v0 + v12 = bits.RotateLeft32(v12, -8) + v8 += v12 + v4 ^= v8 + v4 = bits.RotateLeft32(v4, -7) + v1 += m[s[5]] + v1 += v5 + v13 ^= v1 + v13 = bits.RotateLeft32(v13, -8) + v9 += v13 + v5 ^= v9 + v5 = bits.RotateLeft32(v5, -7) + v2 += m[s[6]] + v2 += v6 + v14 ^= v2 + v14 = bits.RotateLeft32(v14, -8) + v10 += v14 + v6 ^= v10 + v6 = bits.RotateLeft32(v6, -7) + v3 += m[s[7]] + v3 += v7 + v15 ^= v3 + v15 = bits.RotateLeft32(v15, -8) + v11 += v15 + v7 ^= v11 + v7 = bits.RotateLeft32(v7, -7) + + v0 += m[s[8]] + v0 += v5 + v15 ^= v0 + v15 = bits.RotateLeft32(v15, -16) + v10 += v15 + v5 ^= v10 + v5 = bits.RotateLeft32(v5, -12) + v1 += m[s[9]] + v1 += v6 + v12 ^= v1 + v12 = bits.RotateLeft32(v12, -16) + v11 += v12 + v6 ^= v11 + v6 = bits.RotateLeft32(v6, -12) + v2 += m[s[10]] + v2 += v7 + v13 ^= v2 + v13 = bits.RotateLeft32(v13, -16) + v8 += v13 + v7 ^= v8 + v7 = bits.RotateLeft32(v7, -12) + v3 += m[s[11]] + v3 += v4 + v14 ^= v3 + v14 = bits.RotateLeft32(v14, -16) + v9 += v14 + v4 ^= v9 + v4 = bits.RotateLeft32(v4, -12) + + v0 += m[s[12]] + v0 += v5 + v15 ^= v0 + v15 = bits.RotateLeft32(v15, -8) + v10 += v15 + v5 ^= v10 + v5 = bits.RotateLeft32(v5, -7) + v1 += m[s[13]] + v1 += v6 + v12 ^= v1 + v12 = bits.RotateLeft32(v12, -8) + v11 += v12 + v6 ^= v11 + v6 = bits.RotateLeft32(v6, -7) + v2 += m[s[14]] + v2 += v7 + v13 ^= v2 + v13 = bits.RotateLeft32(v13, -8) + v8 += v13 + v7 ^= v8 + v7 = bits.RotateLeft32(v7, -7) + v3 += m[s[15]] + v3 += v4 + v14 ^= v3 + v14 = bits.RotateLeft32(v14, -8) + v9 += v14 + v4 ^= v9 + v4 = bits.RotateLeft32(v4, -7) + } + + h[0] ^= v0 ^ v8 + h[1] ^= v1 ^ v9 + h[2] ^= v2 ^ v10 + h[3] ^= v3 ^ v11 + h[4] ^= v4 ^ v12 + h[5] ^= v5 ^ v13 + h[6] ^= v6 ^ v14 + h[7] ^= v7 ^ v15 + } + c[0], c[1] = c0, c1 +} diff --git a/vendor/golang.org/x/crypto/blake2s/blake2s_ref.go b/vendor/golang.org/x/crypto/blake2s/blake2s_ref.go new file mode 100644 index 000000000..799dba0c4 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2s/blake2s_ref.go @@ -0,0 +1,18 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build (!amd64 && !386) || !gc || purego +// +build !amd64,!386 !gc purego + +package blake2s + +var ( + useSSE4 = false + useSSSE3 = false + useSSE2 = false +) + +func hashBlocks(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) { + hashBlocksGeneric(h, c, flag, blocks) +} diff --git a/vendor/golang.org/x/crypto/blake2s/blake2x.go b/vendor/golang.org/x/crypto/blake2s/blake2x.go new file mode 100644 index 000000000..828749ff0 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2s/blake2x.go @@ -0,0 +1,178 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package blake2s + +import ( + "encoding/binary" + "errors" + "io" +) + +// XOF defines the interface to hash functions that +// support arbitrary-length output. +type XOF interface { + // Write absorbs more data into the hash's state. It panics if called + // after Read. + io.Writer + + // Read reads more output from the hash. It returns io.EOF if the limit + // has been reached. + io.Reader + + // Clone returns a copy of the XOF in its current state. + Clone() XOF + + // Reset resets the XOF to its initial state. + Reset() +} + +// OutputLengthUnknown can be used as the size argument to NewXOF to indicate +// the length of the output is not known in advance. +const OutputLengthUnknown = 0 + +// magicUnknownOutputLength is a magic value for the output size that indicates +// an unknown number of output bytes. +const magicUnknownOutputLength = 65535 + +// maxOutputLength is the absolute maximum number of bytes to produce when the +// number of output bytes is unknown. +const maxOutputLength = (1 << 32) * 32 + +// NewXOF creates a new variable-output-length hash. The hash either produce a +// known number of bytes (1 <= size < 65535), or an unknown number of bytes +// (size == OutputLengthUnknown). In the latter case, an absolute limit of +// 128GiB applies. +// +// A non-nil key turns the hash into a MAC. The key must between +// zero and 32 bytes long. +func NewXOF(size uint16, key []byte) (XOF, error) { + if len(key) > Size { + return nil, errKeySize + } + if size == magicUnknownOutputLength { + // 2^16-1 indicates an unknown number of bytes and thus isn't a + // valid length. + return nil, errors.New("blake2s: XOF length too large") + } + if size == OutputLengthUnknown { + size = magicUnknownOutputLength + } + x := &xof{ + d: digest{ + size: Size, + keyLen: len(key), + }, + length: size, + } + copy(x.d.key[:], key) + x.Reset() + return x, nil +} + +type xof struct { + d digest + length uint16 + remaining uint64 + cfg, root, block [Size]byte + offset int + nodeOffset uint32 + readMode bool +} + +func (x *xof) Write(p []byte) (n int, err error) { + if x.readMode { + panic("blake2s: write to XOF after read") + } + return x.d.Write(p) +} + +func (x *xof) Clone() XOF { + clone := *x + return &clone +} + +func (x *xof) Reset() { + x.cfg[0] = byte(Size) + binary.LittleEndian.PutUint32(x.cfg[4:], uint32(Size)) // leaf length + binary.LittleEndian.PutUint16(x.cfg[12:], x.length) // XOF length + x.cfg[15] = byte(Size) // inner hash size + + x.d.Reset() + x.d.h[3] ^= uint32(x.length) + + x.remaining = uint64(x.length) + if x.remaining == magicUnknownOutputLength { + x.remaining = maxOutputLength + } + x.offset, x.nodeOffset = 0, 0 + x.readMode = false +} + +func (x *xof) Read(p []byte) (n int, err error) { + if !x.readMode { + x.d.finalize(&x.root) + x.readMode = true + } + + if x.remaining == 0 { + return 0, io.EOF + } + + n = len(p) + if uint64(n) > x.remaining { + n = int(x.remaining) + p = p[:n] + } + + if x.offset > 0 { + blockRemaining := Size - x.offset + if n < blockRemaining { + x.offset += copy(p, x.block[x.offset:]) + x.remaining -= uint64(n) + return + } + copy(p, x.block[x.offset:]) + p = p[blockRemaining:] + x.offset = 0 + x.remaining -= uint64(blockRemaining) + } + + for len(p) >= Size { + binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset) + x.nodeOffset++ + + x.d.initConfig(&x.cfg) + x.d.Write(x.root[:]) + x.d.finalize(&x.block) + + copy(p, x.block[:]) + p = p[Size:] + x.remaining -= uint64(Size) + } + + if todo := len(p); todo > 0 { + if x.remaining < uint64(Size) { + x.cfg[0] = byte(x.remaining) + } + binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset) + x.nodeOffset++ + + x.d.initConfig(&x.cfg) + x.d.Write(x.root[:]) + x.d.finalize(&x.block) + + x.offset = copy(p, x.block[:todo]) + x.remaining -= uint64(todo) + } + + return +} + +func (d *digest) initConfig(cfg *[Size]byte) { + d.offset, d.c[0], d.c[1] = 0, 0, 0 + for i := range d.h { + d.h[i] = iv[i] ^ binary.LittleEndian.Uint32(cfg[i*4:]) + } +} diff --git a/vendor/golang.org/x/crypto/blake2s/register.go b/vendor/golang.org/x/crypto/blake2s/register.go new file mode 100644 index 000000000..ef79ff3c6 --- /dev/null +++ b/vendor/golang.org/x/crypto/blake2s/register.go @@ -0,0 +1,22 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.9 +// +build go1.9 + +package blake2s + +import ( + "crypto" + "hash" +) + +func init() { + newHash256 := func() hash.Hash { + h, _ := New256(nil) + return h + } + + crypto.RegisterHash(crypto.BLAKE2s_256, newHash256) +} |