summaryrefslogtreecommitdiff
path: root/vendor/github.com/minio/crc64nvme/crc64_arm64.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/minio/crc64nvme/crc64_arm64.s')
-rw-r--r--vendor/github.com/minio/crc64nvme/crc64_arm64.s155
1 files changed, 155 insertions, 0 deletions
diff --git a/vendor/github.com/minio/crc64nvme/crc64_arm64.s b/vendor/github.com/minio/crc64nvme/crc64_arm64.s
new file mode 100644
index 000000000..b61866f63
--- /dev/null
+++ b/vendor/github.com/minio/crc64nvme/crc64_arm64.s
@@ -0,0 +1,155 @@
+// Copyright (c) 2025 Minio Inc. All rights reserved.
+// Use of this source code is governed by a license that can be
+// found in the LICENSE file.
+
+#include "textflag.h"
+
+TEXT ·updateAsm(SB), $0-40
+ MOVD crc+0(FP), R0 // checksum
+ MOVD p_base+8(FP), R1 // start pointer
+ MOVD p_len+16(FP), R2 // length of buffer
+ MOVD $·const(SB), R3 // constants
+ MVN R0, R0
+ LSR $7, R2, R2
+ CMP $1, R2
+ BLT skip128
+
+ FLDPQ (R1), (F0, F1)
+ FLDPQ 32(R1), (F2, F3)
+ FLDPQ 64(R1), (F4, F5)
+ FLDPQ 96(R1), (F6, F7)
+ FMOVD R0, F8
+ VMOVI $0, V9.B16
+ VMOV V9.D[0], V8.D[1]
+ VEOR V8.B16, V0.B16, V0.B16
+ CMP $1, R2
+ BEQ tail128
+
+ MOVD 112(R3), R4
+ MOVD 120(R3), R5
+ FMOVD R4, F8
+ VDUP R5, V9.D2
+
+loop128:
+ ADD $128, R1, R1
+ SUB $1, R2, R2
+ VPMULL V0.D1, V8.D1, V10.Q1
+ VPMULL2 V0.D2, V9.D2, V0.Q1
+ FLDPQ (R1), (F11, F12)
+ VEOR3 V0.B16, V11.B16, V10.B16, V0.B16
+ VPMULL V1.D1, V8.D1, V10.Q1
+ VPMULL2 V1.D2, V9.D2, V1.Q1
+ VEOR3 V1.B16, V12.B16, V10.B16, V1.B16
+ VPMULL V2.D1, V8.D1, V10.Q1
+ VPMULL2 V2.D2, V9.D2, V2.Q1
+ FLDPQ 32(R1), (F11, F12)
+ VEOR3 V2.B16, V11.B16, V10.B16, V2.B16
+ VPMULL V3.D1, V8.D1, V10.Q1
+ VPMULL2 V3.D2, V9.D2, V3.Q1
+ VEOR3 V3.B16, V12.B16, V10.B16, V3.B16
+ VPMULL V4.D1, V8.D1, V10.Q1
+ VPMULL2 V4.D2, V9.D2, V4.Q1
+ FLDPQ 64(R1), (F11, F12)
+ VEOR3 V4.B16, V11.B16, V10.B16, V4.B16
+ VPMULL V5.D1, V8.D1, V10.Q1
+ VPMULL2 V5.D2, V9.D2, V5.Q1
+ VEOR3 V5.B16, V12.B16, V10.B16, V5.B16
+ VPMULL V6.D1, V8.D1, V10.Q1
+ VPMULL2 V6.D2, V9.D2, V6.Q1
+ FLDPQ 96(R1), (F11, F12)
+ VEOR3 V6.B16, V11.B16, V10.B16, V6.B16
+ VPMULL V7.D1, V8.D1, V10.Q1
+ VPMULL2 V7.D2, V9.D2, V7.Q1
+ VEOR3 V7.B16, V12.B16, V10.B16, V7.B16
+ CMP $1, R2
+ BHI loop128
+
+tail128:
+ MOVD (R3), R4
+ FMOVD R4, F11
+ VPMULL V0.D1, V11.D1, V11.Q1
+ MOVD 8(R3), R4
+ VDUP R4, V12.D2
+ VPMULL2 V0.D2, V12.D2, V0.Q1
+ VEOR3 V0.B16, V7.B16, V11.B16, V7.B16
+ MOVD 16(R3), R4
+ FMOVD R4, F11
+ VPMULL V1.D1, V11.D1, V11.Q1
+ MOVD 24(R3), R4
+ VDUP R4, V12.D2
+ VPMULL2 V1.D2, V12.D2, V1.Q1
+ VEOR3 V1.B16, V11.B16, V7.B16, V1.B16
+ MOVD 32(R3), R4
+ FMOVD R4, F11
+ VPMULL V2.D1, V11.D1, V11.Q1
+ MOVD 40(R3), R4
+ VDUP R4, V12.D2
+ VPMULL2 V2.D2, V12.D2, V2.Q1
+ VEOR3 V2.B16, V11.B16, V1.B16, V2.B16
+ MOVD 48(R3), R4
+ FMOVD R4, F11
+ VPMULL V3.D1, V11.D1, V11.Q1
+ MOVD 56(R3), R4
+ VDUP R4, V12.D2
+ VPMULL2 V3.D2, V12.D2, V3.Q1
+ VEOR3 V3.B16, V11.B16, V2.B16, V3.B16
+ MOVD 64(R3), R4
+ FMOVD R4, F11
+ VPMULL V4.D1, V11.D1, V11.Q1
+ MOVD 72(R3), R4
+ VDUP R4, V12.D2
+ VPMULL2 V4.D2, V12.D2, V4.Q1
+ VEOR3 V4.B16, V11.B16, V3.B16, V4.B16
+ MOVD 80(R3), R4
+ FMOVD R4, F11
+ VPMULL V5.D1, V11.D1, V11.Q1
+ MOVD 88(R3), R4
+ VDUP R4, V12.D2
+ VPMULL2 V5.D2, V12.D2, V5.Q1
+ VEOR3 V5.B16, V11.B16, V4.B16, V5.B16
+ MOVD 96(R3), R4
+ FMOVD R4, F11
+ VPMULL V6.D1, V11.D1, V11.Q1
+ MOVD 104(R3), R4
+ VDUP R4, V12.D2
+ VPMULL2 V6.D2, V12.D2, V6.Q1
+ VEOR3 V6.B16, V11.B16, V5.B16, V6.B16
+ FMOVD R4, F5
+ VPMULL V6.D1, V5.D1, V5.Q1
+ VDUP V6.D[1], V6.D2
+ VEOR V5.B8, V6.B8, V6.B8
+ MOVD 128(R3), R4
+ FMOVD R4, F4
+ VPMULL V4.D1, V6.D1, V6.Q1
+ FMOVD F6, R4
+ MOVD 136(R3), R5
+ FMOVD R5, F4
+ VPMULL V4.D1, V6.D1, V6.Q1
+ VEOR V6.B16, V5.B16, V6.B16
+ VMOV V6.D[1], R5
+ EOR R4, R5, R0
+
+skip128:
+ MVN R0, R0
+ MOVD R0, checksum+32(FP)
+ RET
+
+DATA ·const+0x000(SB)/8, $0xd083dd594d96319d // K_959
+DATA ·const+0x008(SB)/8, $0x946588403d4adcbc // K_895
+DATA ·const+0x010(SB)/8, $0x3c255f5ebc414423 // K_831
+DATA ·const+0x018(SB)/8, $0x34f5a24e22d66e90 // K_767
+DATA ·const+0x020(SB)/8, $0x7b0ab10dd0f809fe // K_703
+DATA ·const+0x028(SB)/8, $0x03363823e6e791e5 // K_639
+DATA ·const+0x030(SB)/8, $0x0c32cdb31e18a84a // K_575
+DATA ·const+0x038(SB)/8, $0x62242240ace5045a // K_511
+DATA ·const+0x040(SB)/8, $0xbdd7ac0ee1a4a0f0 // K_447
+DATA ·const+0x048(SB)/8, $0xa3ffdc1fe8e82a8b // K_383
+DATA ·const+0x050(SB)/8, $0xb0bc2e589204f500 // K_319
+DATA ·const+0x058(SB)/8, $0xe1e0bb9d45d7a44c // K_255
+DATA ·const+0x060(SB)/8, $0xeadc41fd2ba3d420 // K_191
+DATA ·const+0x068(SB)/8, $0x21e9761e252621ac // K_127
+DATA ·const+0x070(SB)/8, $0xa1ca681e733f9c40 // K_1087
+DATA ·const+0x078(SB)/8, $0x5f852fb61e8d92dc // K_1023
+DATA ·const+0x080(SB)/8, $0x27ecfa329aef9f77 // MU
+DATA ·const+0x088(SB)/8, $0x34d926535897936b // POLY
+GLOBL ·const(SB), (NOPTR+RODATA), $144