summaryrefslogtreecommitdiff
path: root/vendor/github.com/cespare/xxhash/xxhash_amd64.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/cespare/xxhash/xxhash_amd64.s')
-rw-r--r--vendor/github.com/cespare/xxhash/xxhash_amd64.s233
1 files changed, 0 insertions, 233 deletions
diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/xxhash_amd64.s
deleted file mode 100644
index 757f2011f..000000000
--- a/vendor/github.com/cespare/xxhash/xxhash_amd64.s
+++ /dev/null
@@ -1,233 +0,0 @@
-// +build !appengine
-// +build gc
-// +build !purego
-
-#include "textflag.h"
-
-// Register allocation:
-// AX h
-// CX pointer to advance through b
-// DX n
-// BX loop end
-// R8 v1, k1
-// R9 v2
-// R10 v3
-// R11 v4
-// R12 tmp
-// R13 prime1v
-// R14 prime2v
-// R15 prime4v
-
-// round reads from and advances the buffer pointer in CX.
-// It assumes that R13 has prime1v and R14 has prime2v.
-#define round(r) \
- MOVQ (CX), R12 \
- ADDQ $8, CX \
- IMULQ R14, R12 \
- ADDQ R12, r \
- ROLQ $31, r \
- IMULQ R13, r
-
-// mergeRound applies a merge round on the two registers acc and val.
-// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
-#define mergeRound(acc, val) \
- IMULQ R14, val \
- ROLQ $31, val \
- IMULQ R13, val \
- XORQ val, acc \
- IMULQ R13, acc \
- ADDQ R15, acc
-
-// func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOSPLIT, $0-32
- // Load fixed primes.
- MOVQ ·prime1v(SB), R13
- MOVQ ·prime2v(SB), R14
- MOVQ ·prime4v(SB), R15
-
- // Load slice.
- MOVQ b_base+0(FP), CX
- MOVQ b_len+8(FP), DX
- LEAQ (CX)(DX*1), BX
-
- // The first loop limit will be len(b)-32.
- SUBQ $32, BX
-
- // Check whether we have at least one block.
- CMPQ DX, $32
- JLT noBlocks
-
- // Set up initial state (v1, v2, v3, v4).
- MOVQ R13, R8
- ADDQ R14, R8
- MOVQ R14, R9
- XORQ R10, R10
- XORQ R11, R11
- SUBQ R13, R11
-
- // Loop until CX > BX.
-blockLoop:
- round(R8)
- round(R9)
- round(R10)
- round(R11)
-
- CMPQ CX, BX
- JLE blockLoop
-
- MOVQ R8, AX
- ROLQ $1, AX
- MOVQ R9, R12
- ROLQ $7, R12
- ADDQ R12, AX
- MOVQ R10, R12
- ROLQ $12, R12
- ADDQ R12, AX
- MOVQ R11, R12
- ROLQ $18, R12
- ADDQ R12, AX
-
- mergeRound(AX, R8)
- mergeRound(AX, R9)
- mergeRound(AX, R10)
- mergeRound(AX, R11)
-
- JMP afterBlocks
-
-noBlocks:
- MOVQ ·prime5v(SB), AX
-
-afterBlocks:
- ADDQ DX, AX
-
- // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
- ADDQ $24, BX
-
- CMPQ CX, BX
- JG fourByte
-
-wordLoop:
- // Calculate k1.
- MOVQ (CX), R8
- ADDQ $8, CX
- IMULQ R14, R8
- ROLQ $31, R8
- IMULQ R13, R8
-
- XORQ R8, AX
- ROLQ $27, AX
- IMULQ R13, AX
- ADDQ R15, AX
-
- CMPQ CX, BX
- JLE wordLoop
-
-fourByte:
- ADDQ $4, BX
- CMPQ CX, BX
- JG singles
-
- MOVL (CX), R8
- ADDQ $4, CX
- IMULQ R13, R8
- XORQ R8, AX
-
- ROLQ $23, AX
- IMULQ R14, AX
- ADDQ ·prime3v(SB), AX
-
-singles:
- ADDQ $4, BX
- CMPQ CX, BX
- JGE finalize
-
-singlesLoop:
- MOVBQZX (CX), R12
- ADDQ $1, CX
- IMULQ ·prime5v(SB), R12
- XORQ R12, AX
-
- ROLQ $11, AX
- IMULQ R13, AX
-
- CMPQ CX, BX
- JL singlesLoop
-
-finalize:
- MOVQ AX, R12
- SHRQ $33, R12
- XORQ R12, AX
- IMULQ R14, AX
- MOVQ AX, R12
- SHRQ $29, R12
- XORQ R12, AX
- IMULQ ·prime3v(SB), AX
- MOVQ AX, R12
- SHRQ $32, R12
- XORQ R12, AX
-
- MOVQ AX, ret+24(FP)
- RET
-
-// writeBlocks uses the same registers as above except that it uses AX to store
-// the x pointer.
-
-// func writeBlocks(x *xxh, b []byte) []byte
-TEXT ·writeBlocks(SB), NOSPLIT, $0-56
- // Load fixed primes needed for round.
- MOVQ ·prime1v(SB), R13
- MOVQ ·prime2v(SB), R14
-
- // Load slice.
- MOVQ b_base+8(FP), CX
- MOVQ CX, ret_base+32(FP) // initialize return base pointer; see NOTE below
- MOVQ b_len+16(FP), DX
- LEAQ (CX)(DX*1), BX
- SUBQ $32, BX
-
- // Load vN from x.
- MOVQ x+0(FP), AX
- MOVQ 0(AX), R8 // v1
- MOVQ 8(AX), R9 // v2
- MOVQ 16(AX), R10 // v3
- MOVQ 24(AX), R11 // v4
-
- // We don't need to check the loop condition here; this function is
- // always called with at least one block of data to process.
-blockLoop:
- round(R8)
- round(R9)
- round(R10)
- round(R11)
-
- CMPQ CX, BX
- JLE blockLoop
-
- // Copy vN back to x.
- MOVQ R8, 0(AX)
- MOVQ R9, 8(AX)
- MOVQ R10, 16(AX)
- MOVQ R11, 24(AX)
-
- // Construct return slice.
- // NOTE: It's important that we don't construct a slice that has a base
- // pointer off the end of the original slice, as in Go 1.7+ this will
- // cause runtime crashes. (See discussion in, for example,
- // https://github.com/golang/go/issues/16772.)
- // Therefore, we calculate the length/cap first, and if they're zero, we
- // keep the old base. This is what the compiler does as well if you
- // write code like
- // b = b[len(b):]
-
- // New length is 32 - (CX - BX) -> BX+32 - CX.
- ADDQ $32, BX
- SUBQ CX, BX
- JZ afterSetBase
-
- MOVQ CX, ret_base+32(FP)
-
-afterSetBase:
- MOVQ BX, ret_len+40(FP)
- MOVQ BX, ret_cap+48(FP) // set cap == len
-
- RET