summaryrefslogtreecommitdiff
path: root/vendor/github.com/minio/crc64nvme/crc64_amd64.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/minio/crc64nvme/crc64_amd64.s')
-rw-r--r--vendor/github.com/minio/crc64nvme/crc64_amd64.s150
1 files changed, 150 insertions, 0 deletions
diff --git a/vendor/github.com/minio/crc64nvme/crc64_amd64.s b/vendor/github.com/minio/crc64nvme/crc64_amd64.s
index 9782321fd..acfea6a15 100644
--- a/vendor/github.com/minio/crc64nvme/crc64_amd64.s
+++ b/vendor/github.com/minio/crc64nvme/crc64_amd64.s
@@ -155,3 +155,153 @@ skip128:
NOTQ AX
MOVQ AX, checksum+32(FP)
RET
+
+// Constants, pre-splatted.
+DATA ·asmConstantsPoly<>+0x00(SB)/8, $0xa1ca681e733f9c40
+DATA ·asmConstantsPoly<>+0x08(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x10(SB)/8, $0xa1ca681e733f9c40
+DATA ·asmConstantsPoly<>+0x18(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x20(SB)/8, $0xa1ca681e733f9c40
+DATA ·asmConstantsPoly<>+0x28(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x30(SB)/8, $0xa1ca681e733f9c40
+DATA ·asmConstantsPoly<>+0x38(SB)/8, $0
+// Upper
+DATA ·asmConstantsPoly<>+0x40(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x48(SB)/8, $0x5f852fb61e8d92dc
+DATA ·asmConstantsPoly<>+0x50(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x58(SB)/8, $0x5f852fb61e8d92dc
+DATA ·asmConstantsPoly<>+0x60(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x68(SB)/8, $0x5f852fb61e8d92dc
+DATA ·asmConstantsPoly<>+0x70(SB)/8, $0
+DATA ·asmConstantsPoly<>+0x78(SB)/8, $0x5f852fb61e8d92dc
+GLOBL ·asmConstantsPoly<>(SB), (NOPTR+RODATA), $128
+
+TEXT ·updateAsm512(SB), $0-40
+ MOVQ crc+0(FP), AX // checksum
+ MOVQ p_base+8(FP), SI // start pointer
+ MOVQ p_len+16(FP), CX // length of buffer
+ NOTQ AX
+ SHRQ $7, CX
+ CMPQ CX, $1
+ VPXORQ Z8, Z8, Z8 // Initialize ZMM8 to zero
+ JLT skip128
+
+ VMOVDQU64 0x00(SI), Z0
+ VMOVDQU64 0x40(SI), Z4
+ MOVQ $·asmConstantsPoly<>(SB), BX
+ VMOVQ AX, X8
+
+ // XOR initialization value into lower 64 bits of ZMM0
+ VPXORQ Z8, Z0, Z0
+ CMPQ CX, $1
+ JE tail128
+
+ VMOVDQU64 0(BX), Z8
+ VMOVDQU64 64(BX), Z9
+
+ PCALIGN $16
+
+loop128:
+ VMOVDQU64 0x80(SI), Z1
+ VMOVDQU64 0xc0(SI), Z5
+ ADDQ $128, SI
+
+ SUBQ $1, CX
+ VPCLMULQDQ $0x00, Z8, Z0, Z10
+ VPCLMULQDQ $0x11, Z9, Z0, Z0
+ VPTERNLOGD $0x96, Z1, Z10, Z0 // Combine results with xor into Z0
+
+ VPCLMULQDQ $0x00, Z8, Z4, Z10
+ VPCLMULQDQ $0x11, Z9, Z4, Z4
+ VPTERNLOGD $0x96, Z5, Z10, Z4 // Combine results with xor into Z4
+
+ CMPQ CX, $1
+ JGT loop128
+
+tail128:
+ // Extract X0 to X3 from ZMM0
+ VEXTRACTF32X4 $1, Z0, X1 // X1: Second 128-bit lane
+ VEXTRACTF32X4 $2, Z0, X2 // X2: Third 128-bit lane
+ VEXTRACTF32X4 $3, Z0, X3 // X3: Fourth 128-bit lane
+
+ // Extract X4 to X7 from ZMM4
+ VEXTRACTF32X4 $1, Z4, X5 // X5: Second 128-bit lane
+ VEXTRACTF32X4 $2, Z4, X6 // X6: Third 128-bit lane
+ VEXTRACTF32X4 $3, Z4, X7 // X7: Fourth 128-bit lane
+
+ MOVQ $0xd083dd594d96319d, AX
+ MOVQ AX, X11
+ PCLMULQDQ $0x00, X0, X11
+ MOVQ $0x946588403d4adcbc, AX
+ PINSRQ $0x1, AX, X12
+ PCLMULQDQ $0x11, X12, X0
+ PXOR X11, X7
+ PXOR X0, X7
+ MOVQ $0x3c255f5ebc414423, AX
+ MOVQ AX, X11
+ PCLMULQDQ $0x00, X1, X11
+ MOVQ $0x34f5a24e22d66e90, AX
+ PINSRQ $0x1, AX, X12
+ PCLMULQDQ $0x11, X12, X1
+ PXOR X11, X1
+ PXOR X7, X1
+ MOVQ $0x7b0ab10dd0f809fe, AX
+ MOVQ AX, X11
+ PCLMULQDQ $0x00, X2, X11
+ MOVQ $0x03363823e6e791e5, AX
+ PINSRQ $0x1, AX, X12
+ PCLMULQDQ $0x11, X12, X2
+ PXOR X11, X2
+ PXOR X1, X2
+ MOVQ $0x0c32cdb31e18a84a, AX
+ MOVQ AX, X11
+ PCLMULQDQ $0x00, X3, X11
+ MOVQ $0x62242240ace5045a, AX
+ PINSRQ $0x1, AX, X12
+ PCLMULQDQ $0x11, X12, X3
+ PXOR X11, X3
+ PXOR X2, X3
+ MOVQ $0xbdd7ac0ee1a4a0f0, AX
+ MOVQ AX, X11
+ PCLMULQDQ $0x00, X4, X11
+ MOVQ $0xa3ffdc1fe8e82a8b, AX
+ PINSRQ $0x1, AX, X12
+ PCLMULQDQ $0x11, X12, X4
+ PXOR X11, X4
+ PXOR X3, X4
+ MOVQ $0xb0bc2e589204f500, AX
+ MOVQ AX, X11
+ PCLMULQDQ $0x00, X5, X11
+ MOVQ $0xe1e0bb9d45d7a44c, AX
+ PINSRQ $0x1, AX, X12
+ PCLMULQDQ $0x11, X12, X5
+ PXOR X11, X5
+ PXOR X4, X5
+ MOVQ $0xeadc41fd2ba3d420, AX
+ MOVQ AX, X11
+ PCLMULQDQ $0x00, X6, X11
+ MOVQ $0x21e9761e252621ac, AX
+ PINSRQ $0x1, AX, X12
+ PCLMULQDQ $0x11, X12, X6
+ PXOR X11, X6
+ PXOR X5, X6
+ MOVQ AX, X5
+ PCLMULQDQ $0x00, X6, X5
+ PSHUFD $0xee, X6, X6
+ PXOR X5, X6
+ MOVQ $0x27ecfa329aef9f77, AX
+ MOVQ AX, X4
+ PCLMULQDQ $0x00, X4, X6
+ PEXTRQ $0, X6, BX
+ MOVQ $0x34d926535897936b, AX
+ MOVQ AX, X4
+ PCLMULQDQ $0x00, X4, X6
+ PXOR X5, X6
+ PEXTRQ $1, X6, AX
+ XORQ BX, AX
+
+skip128:
+ NOTQ AX
+ MOVQ AX, checksum+32(FP)
+ VZEROUPPER
+ RET