diff options
Diffstat (limited to 'vendor/github.com/cespare/xxhash/xxhash_amd64.s')
| -rw-r--r-- | vendor/github.com/cespare/xxhash/xxhash_amd64.s | 233 |
1 files changed, 0 insertions, 233 deletions
diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/xxhash_amd64.s deleted file mode 100644 index 757f2011f..000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_amd64.s +++ /dev/null @@ -1,233 +0,0 @@ -// +build !appengine -// +build gc -// +build !purego - -#include "textflag.h" - -// Register allocation: -// AX h -// CX pointer to advance through b -// DX n -// BX loop end -// R8 v1, k1 -// R9 v2 -// R10 v3 -// R11 v4 -// R12 tmp -// R13 prime1v -// R14 prime2v -// R15 prime4v - -// round reads from and advances the buffer pointer in CX. -// It assumes that R13 has prime1v and R14 has prime2v. -#define round(r) \ - MOVQ (CX), R12 \ - ADDQ $8, CX \ - IMULQ R14, R12 \ - ADDQ R12, r \ - ROLQ $31, r \ - IMULQ R13, r - -// mergeRound applies a merge round on the two registers acc and val. -// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v. -#define mergeRound(acc, val) \ - IMULQ R14, val \ - ROLQ $31, val \ - IMULQ R13, val \ - XORQ val, acc \ - IMULQ R13, acc \ - ADDQ R15, acc - -// func Sum64(b []byte) uint64 -TEXT ·Sum64(SB), NOSPLIT, $0-32 - // Load fixed primes. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 - MOVQ ·prime4v(SB), R15 - - // Load slice. - MOVQ b_base+0(FP), CX - MOVQ b_len+8(FP), DX - LEAQ (CX)(DX*1), BX - - // The first loop limit will be len(b)-32. - SUBQ $32, BX - - // Check whether we have at least one block. - CMPQ DX, $32 - JLT noBlocks - - // Set up initial state (v1, v2, v3, v4). - MOVQ R13, R8 - ADDQ R14, R8 - MOVQ R14, R9 - XORQ R10, R10 - XORQ R11, R11 - SUBQ R13, R11 - - // Loop until CX > BX. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ CX, BX - JLE blockLoop - - MOVQ R8, AX - ROLQ $1, AX - MOVQ R9, R12 - ROLQ $7, R12 - ADDQ R12, AX - MOVQ R10, R12 - ROLQ $12, R12 - ADDQ R12, AX - MOVQ R11, R12 - ROLQ $18, R12 - ADDQ R12, AX - - mergeRound(AX, R8) - mergeRound(AX, R9) - mergeRound(AX, R10) - mergeRound(AX, R11) - - JMP afterBlocks - -noBlocks: - MOVQ ·prime5v(SB), AX - -afterBlocks: - ADDQ DX, AX - - // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8. - ADDQ $24, BX - - CMPQ CX, BX - JG fourByte - -wordLoop: - // Calculate k1. - MOVQ (CX), R8 - ADDQ $8, CX - IMULQ R14, R8 - ROLQ $31, R8 - IMULQ R13, R8 - - XORQ R8, AX - ROLQ $27, AX - IMULQ R13, AX - ADDQ R15, AX - - CMPQ CX, BX - JLE wordLoop - -fourByte: - ADDQ $4, BX - CMPQ CX, BX - JG singles - - MOVL (CX), R8 - ADDQ $4, CX - IMULQ R13, R8 - XORQ R8, AX - - ROLQ $23, AX - IMULQ R14, AX - ADDQ ·prime3v(SB), AX - -singles: - ADDQ $4, BX - CMPQ CX, BX - JGE finalize - -singlesLoop: - MOVBQZX (CX), R12 - ADDQ $1, CX - IMULQ ·prime5v(SB), R12 - XORQ R12, AX - - ROLQ $11, AX - IMULQ R13, AX - - CMPQ CX, BX - JL singlesLoop - -finalize: - MOVQ AX, R12 - SHRQ $33, R12 - XORQ R12, AX - IMULQ R14, AX - MOVQ AX, R12 - SHRQ $29, R12 - XORQ R12, AX - IMULQ ·prime3v(SB), AX - MOVQ AX, R12 - SHRQ $32, R12 - XORQ R12, AX - - MOVQ AX, ret+24(FP) - RET - -// writeBlocks uses the same registers as above except that it uses AX to store -// the x pointer. - -// func writeBlocks(x *xxh, b []byte) []byte -TEXT ·writeBlocks(SB), NOSPLIT, $0-56 - // Load fixed primes needed for round. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 - - // Load slice. - MOVQ b_base+8(FP), CX - MOVQ CX, ret_base+32(FP) // initialize return base pointer; see NOTE below - MOVQ b_len+16(FP), DX - LEAQ (CX)(DX*1), BX - SUBQ $32, BX - - // Load vN from x. - MOVQ x+0(FP), AX - MOVQ 0(AX), R8 // v1 - MOVQ 8(AX), R9 // v2 - MOVQ 16(AX), R10 // v3 - MOVQ 24(AX), R11 // v4 - - // We don't need to check the loop condition here; this function is - // always called with at least one block of data to process. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ CX, BX - JLE blockLoop - - // Copy vN back to x. - MOVQ R8, 0(AX) - MOVQ R9, 8(AX) - MOVQ R10, 16(AX) - MOVQ R11, 24(AX) - - // Construct return slice. - // NOTE: It's important that we don't construct a slice that has a base - // pointer off the end of the original slice, as in Go 1.7+ this will - // cause runtime crashes. (See discussion in, for example, - // https://github.com/golang/go/issues/16772.) - // Therefore, we calculate the length/cap first, and if they're zero, we - // keep the old base. This is what the compiler does as well if you - // write code like - // b = b[len(b):] - - // New length is 32 - (CX - BX) -> BX+32 - CX. - ADDQ $32, BX - SUBQ CX, BX - JZ afterSetBase - - MOVQ CX, ret_base+32(FP) - -afterSetBase: - MOVQ BX, ret_len+40(FP) - MOVQ BX, ret_cap+48(FP) // set cap == len - - RET |
