summaryrefslogtreecommitdiff
path: root/vendor/github.com/minio/crc64nvme/crc64_arm64.s
blob: b61866f6306e292cfe1d61ad5235c665e8e2f321 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// Copyright (c) 2025 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.

#include "textflag.h"

TEXT ·updateAsm(SB), $0-40
	MOVD crc+0(FP), R0    // checksum
	MOVD p_base+8(FP), R1 // start pointer
	MOVD p_len+16(FP), R2 // length of buffer
	MOVD  $·const(SB), R3 // constants
	MVN  R0, R0
	LSR  $7, R2, R2
	CMP  $1, R2
	BLT  skip128

	FLDPQ (R1), (F0, F1)
	FLDPQ 32(R1), (F2, F3)
	FLDPQ 64(R1), (F4, F5)
	FLDPQ 96(R1), (F6, F7)
	FMOVD R0, F8
	VMOVI $0, V9.B16
	VMOV  V9.D[0], V8.D[1]
	VEOR  V8.B16, V0.B16, V0.B16
	CMP   $1, R2
	BEQ   tail128

	MOVD  112(R3), R4
	MOVD  120(R3), R5
	FMOVD R4, F8
	VDUP  R5, V9.D2

loop128:
	ADD     $128, R1, R1
	SUB     $1, R2, R2
	VPMULL  V0.D1, V8.D1, V10.Q1
	VPMULL2 V0.D2, V9.D2, V0.Q1
	FLDPQ   (R1), (F11, F12)
	VEOR3   V0.B16, V11.B16, V10.B16, V0.B16
	VPMULL  V1.D1, V8.D1, V10.Q1
	VPMULL2 V1.D2, V9.D2, V1.Q1
	VEOR3   V1.B16, V12.B16, V10.B16, V1.B16
	VPMULL  V2.D1, V8.D1, V10.Q1
	VPMULL2 V2.D2, V9.D2, V2.Q1
	FLDPQ   32(R1), (F11, F12)
	VEOR3   V2.B16, V11.B16, V10.B16, V2.B16
	VPMULL  V3.D1, V8.D1, V10.Q1
	VPMULL2 V3.D2, V9.D2, V3.Q1
	VEOR3   V3.B16, V12.B16, V10.B16, V3.B16
	VPMULL  V4.D1, V8.D1, V10.Q1
	VPMULL2 V4.D2, V9.D2, V4.Q1
	FLDPQ   64(R1), (F11, F12)
	VEOR3   V4.B16, V11.B16, V10.B16, V4.B16
	VPMULL  V5.D1, V8.D1, V10.Q1
	VPMULL2 V5.D2, V9.D2, V5.Q1
	VEOR3   V5.B16, V12.B16, V10.B16, V5.B16
	VPMULL  V6.D1, V8.D1, V10.Q1
	VPMULL2 V6.D2, V9.D2, V6.Q1
	FLDPQ   96(R1), (F11, F12)
	VEOR3   V6.B16, V11.B16, V10.B16, V6.B16
	VPMULL  V7.D1, V8.D1, V10.Q1
	VPMULL2 V7.D2, V9.D2, V7.Q1
	VEOR3   V7.B16, V12.B16, V10.B16, V7.B16
	CMP     $1, R2
	BHI     loop128

tail128:
	MOVD    (R3), R4
	FMOVD   R4, F11
	VPMULL  V0.D1, V11.D1, V11.Q1
	MOVD    8(R3), R4
	VDUP    R4, V12.D2
	VPMULL2 V0.D2, V12.D2, V0.Q1
	VEOR3   V0.B16, V7.B16, V11.B16, V7.B16
	MOVD    16(R3), R4
	FMOVD   R4, F11
	VPMULL  V1.D1, V11.D1, V11.Q1
	MOVD    24(R3), R4
	VDUP    R4, V12.D2
	VPMULL2 V1.D2, V12.D2, V1.Q1
	VEOR3   V1.B16, V11.B16, V7.B16, V1.B16
	MOVD    32(R3), R4
	FMOVD   R4, F11
	VPMULL  V2.D1, V11.D1, V11.Q1
	MOVD    40(R3), R4
	VDUP    R4, V12.D2
	VPMULL2 V2.D2, V12.D2, V2.Q1
	VEOR3   V2.B16, V11.B16, V1.B16, V2.B16
	MOVD    48(R3), R4
	FMOVD   R4, F11
	VPMULL  V3.D1, V11.D1, V11.Q1
	MOVD    56(R3), R4
	VDUP    R4, V12.D2
	VPMULL2 V3.D2, V12.D2, V3.Q1
	VEOR3   V3.B16, V11.B16, V2.B16, V3.B16
	MOVD    64(R3), R4
	FMOVD   R4, F11
	VPMULL  V4.D1, V11.D1, V11.Q1
	MOVD    72(R3), R4
	VDUP    R4, V12.D2
	VPMULL2 V4.D2, V12.D2, V4.Q1
	VEOR3   V4.B16, V11.B16, V3.B16, V4.B16
	MOVD    80(R3), R4
	FMOVD   R4, F11
	VPMULL  V5.D1, V11.D1, V11.Q1
	MOVD    88(R3), R4
	VDUP    R4, V12.D2
	VPMULL2 V5.D2, V12.D2, V5.Q1
	VEOR3   V5.B16, V11.B16, V4.B16, V5.B16
	MOVD    96(R3), R4
	FMOVD   R4, F11
	VPMULL  V6.D1, V11.D1, V11.Q1
	MOVD    104(R3), R4
	VDUP    R4, V12.D2
	VPMULL2 V6.D2, V12.D2, V6.Q1
	VEOR3   V6.B16, V11.B16, V5.B16, V6.B16
	FMOVD   R4, F5
	VPMULL  V6.D1, V5.D1, V5.Q1
	VDUP    V6.D[1], V6.D2
	VEOR    V5.B8, V6.B8, V6.B8
	MOVD    128(R3), R4
	FMOVD   R4, F4
	VPMULL  V4.D1, V6.D1, V6.Q1
	FMOVD   F6, R4
	MOVD    136(R3), R5
	FMOVD   R5, F4
	VPMULL  V4.D1, V6.D1, V6.Q1
	VEOR    V6.B16, V5.B16, V6.B16
	VMOV    V6.D[1], R5
	EOR     R4, R5, R0

skip128:
	MVN  R0, R0
	MOVD R0, checksum+32(FP)
	RET

DATA ·const+0x000(SB)/8, $0xd083dd594d96319d // K_959
DATA ·const+0x008(SB)/8, $0x946588403d4adcbc // K_895
DATA ·const+0x010(SB)/8, $0x3c255f5ebc414423 // K_831
DATA ·const+0x018(SB)/8, $0x34f5a24e22d66e90 // K_767
DATA ·const+0x020(SB)/8, $0x7b0ab10dd0f809fe // K_703
DATA ·const+0x028(SB)/8, $0x03363823e6e791e5 // K_639
DATA ·const+0x030(SB)/8, $0x0c32cdb31e18a84a // K_575
DATA ·const+0x038(SB)/8, $0x62242240ace5045a // K_511
DATA ·const+0x040(SB)/8, $0xbdd7ac0ee1a4a0f0 // K_447
DATA ·const+0x048(SB)/8, $0xa3ffdc1fe8e82a8b // K_383
DATA ·const+0x050(SB)/8, $0xb0bc2e589204f500 // K_319
DATA ·const+0x058(SB)/8, $0xe1e0bb9d45d7a44c // K_255
DATA ·const+0x060(SB)/8, $0xeadc41fd2ba3d420 // K_191
DATA ·const+0x068(SB)/8, $0x21e9761e252621ac // K_127
DATA ·const+0x070(SB)/8, $0xa1ca681e733f9c40 // K_1087
DATA ·const+0x078(SB)/8, $0x5f852fb61e8d92dc // K_1023
DATA ·const+0x080(SB)/8, $0x27ecfa329aef9f77 // MU
DATA ·const+0x088(SB)/8, $0x34d926535897936b // POLY
GLOBL ·const(SB), (NOPTR+RODATA), $144