diff options
Diffstat (limited to 'vendor/github.com/minio/crc64nvme/crc64_arm64.s')
-rw-r--r-- | vendor/github.com/minio/crc64nvme/crc64_arm64.s | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/vendor/github.com/minio/crc64nvme/crc64_arm64.s b/vendor/github.com/minio/crc64nvme/crc64_arm64.s new file mode 100644 index 000000000..b61866f63 --- /dev/null +++ b/vendor/github.com/minio/crc64nvme/crc64_arm64.s @@ -0,0 +1,155 @@ +// Copyright (c) 2025 Minio Inc. All rights reserved. +// Use of this source code is governed by a license that can be +// found in the LICENSE file. + +#include "textflag.h" + +TEXT ·updateAsm(SB), $0-40 + MOVD crc+0(FP), R0 // checksum + MOVD p_base+8(FP), R1 // start pointer + MOVD p_len+16(FP), R2 // length of buffer + MOVD $·const(SB), R3 // constants + MVN R0, R0 + LSR $7, R2, R2 + CMP $1, R2 + BLT skip128 + + FLDPQ (R1), (F0, F1) + FLDPQ 32(R1), (F2, F3) + FLDPQ 64(R1), (F4, F5) + FLDPQ 96(R1), (F6, F7) + FMOVD R0, F8 + VMOVI $0, V9.B16 + VMOV V9.D[0], V8.D[1] + VEOR V8.B16, V0.B16, V0.B16 + CMP $1, R2 + BEQ tail128 + + MOVD 112(R3), R4 + MOVD 120(R3), R5 + FMOVD R4, F8 + VDUP R5, V9.D2 + +loop128: + ADD $128, R1, R1 + SUB $1, R2, R2 + VPMULL V0.D1, V8.D1, V10.Q1 + VPMULL2 V0.D2, V9.D2, V0.Q1 + FLDPQ (R1), (F11, F12) + VEOR3 V0.B16, V11.B16, V10.B16, V0.B16 + VPMULL V1.D1, V8.D1, V10.Q1 + VPMULL2 V1.D2, V9.D2, V1.Q1 + VEOR3 V1.B16, V12.B16, V10.B16, V1.B16 + VPMULL V2.D1, V8.D1, V10.Q1 + VPMULL2 V2.D2, V9.D2, V2.Q1 + FLDPQ 32(R1), (F11, F12) + VEOR3 V2.B16, V11.B16, V10.B16, V2.B16 + VPMULL V3.D1, V8.D1, V10.Q1 + VPMULL2 V3.D2, V9.D2, V3.Q1 + VEOR3 V3.B16, V12.B16, V10.B16, V3.B16 + VPMULL V4.D1, V8.D1, V10.Q1 + VPMULL2 V4.D2, V9.D2, V4.Q1 + FLDPQ 64(R1), (F11, F12) + VEOR3 V4.B16, V11.B16, V10.B16, V4.B16 + VPMULL V5.D1, V8.D1, V10.Q1 + VPMULL2 V5.D2, V9.D2, V5.Q1 + VEOR3 V5.B16, V12.B16, V10.B16, V5.B16 + VPMULL V6.D1, V8.D1, V10.Q1 + VPMULL2 V6.D2, V9.D2, V6.Q1 + FLDPQ 96(R1), (F11, F12) + VEOR3 V6.B16, V11.B16, V10.B16, V6.B16 + VPMULL V7.D1, V8.D1, V10.Q1 + VPMULL2 V7.D2, V9.D2, V7.Q1 + VEOR3 V7.B16, V12.B16, V10.B16, V7.B16 + CMP $1, R2 + BHI loop128 + +tail128: + MOVD (R3), R4 + FMOVD R4, F11 + VPMULL V0.D1, V11.D1, V11.Q1 + MOVD 8(R3), R4 + VDUP R4, V12.D2 + VPMULL2 V0.D2, V12.D2, V0.Q1 + VEOR3 V0.B16, V7.B16, V11.B16, V7.B16 + MOVD 16(R3), R4 + FMOVD R4, F11 + VPMULL V1.D1, V11.D1, V11.Q1 + MOVD 24(R3), R4 + VDUP R4, V12.D2 + VPMULL2 V1.D2, V12.D2, V1.Q1 + VEOR3 V1.B16, V11.B16, V7.B16, V1.B16 + MOVD 32(R3), R4 + FMOVD R4, F11 + VPMULL V2.D1, V11.D1, V11.Q1 + MOVD 40(R3), R4 + VDUP R4, V12.D2 + VPMULL2 V2.D2, V12.D2, V2.Q1 + VEOR3 V2.B16, V11.B16, V1.B16, V2.B16 + MOVD 48(R3), R4 + FMOVD R4, F11 + VPMULL V3.D1, V11.D1, V11.Q1 + MOVD 56(R3), R4 + VDUP R4, V12.D2 + VPMULL2 V3.D2, V12.D2, V3.Q1 + VEOR3 V3.B16, V11.B16, V2.B16, V3.B16 + MOVD 64(R3), R4 + FMOVD R4, F11 + VPMULL V4.D1, V11.D1, V11.Q1 + MOVD 72(R3), R4 + VDUP R4, V12.D2 + VPMULL2 V4.D2, V12.D2, V4.Q1 + VEOR3 V4.B16, V11.B16, V3.B16, V4.B16 + MOVD 80(R3), R4 + FMOVD R4, F11 + VPMULL V5.D1, V11.D1, V11.Q1 + MOVD 88(R3), R4 + VDUP R4, V12.D2 + VPMULL2 V5.D2, V12.D2, V5.Q1 + VEOR3 V5.B16, V11.B16, V4.B16, V5.B16 + MOVD 96(R3), R4 + FMOVD R4, F11 + VPMULL V6.D1, V11.D1, V11.Q1 + MOVD 104(R3), R4 + VDUP R4, V12.D2 + VPMULL2 V6.D2, V12.D2, V6.Q1 + VEOR3 V6.B16, V11.B16, V5.B16, V6.B16 + FMOVD R4, F5 + VPMULL V6.D1, V5.D1, V5.Q1 + VDUP V6.D[1], V6.D2 + VEOR V5.B8, V6.B8, V6.B8 + MOVD 128(R3), R4 + FMOVD R4, F4 + VPMULL V4.D1, V6.D1, V6.Q1 + FMOVD F6, R4 + MOVD 136(R3), R5 + FMOVD R5, F4 + VPMULL V4.D1, V6.D1, V6.Q1 + VEOR V6.B16, V5.B16, V6.B16 + VMOV V6.D[1], R5 + EOR R4, R5, R0 + +skip128: + MVN R0, R0 + MOVD R0, checksum+32(FP) + RET + +DATA ·const+0x000(SB)/8, $0xd083dd594d96319d // K_959 +DATA ·const+0x008(SB)/8, $0x946588403d4adcbc // K_895 +DATA ·const+0x010(SB)/8, $0x3c255f5ebc414423 // K_831 +DATA ·const+0x018(SB)/8, $0x34f5a24e22d66e90 // K_767 +DATA ·const+0x020(SB)/8, $0x7b0ab10dd0f809fe // K_703 +DATA ·const+0x028(SB)/8, $0x03363823e6e791e5 // K_639 +DATA ·const+0x030(SB)/8, $0x0c32cdb31e18a84a // K_575 +DATA ·const+0x038(SB)/8, $0x62242240ace5045a // K_511 +DATA ·const+0x040(SB)/8, $0xbdd7ac0ee1a4a0f0 // K_447 +DATA ·const+0x048(SB)/8, $0xa3ffdc1fe8e82a8b // K_383 +DATA ·const+0x050(SB)/8, $0xb0bc2e589204f500 // K_319 +DATA ·const+0x058(SB)/8, $0xe1e0bb9d45d7a44c // K_255 +DATA ·const+0x060(SB)/8, $0xeadc41fd2ba3d420 // K_191 +DATA ·const+0x068(SB)/8, $0x21e9761e252621ac // K_127 +DATA ·const+0x070(SB)/8, $0xa1ca681e733f9c40 // K_1087 +DATA ·const+0x078(SB)/8, $0x5f852fb61e8d92dc // K_1023 +DATA ·const+0x080(SB)/8, $0x27ecfa329aef9f77 // MU +DATA ·const+0x088(SB)/8, $0x34d926535897936b // POLY +GLOBL ·const(SB), (NOPTR+RODATA), $144 |