1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
// Copyright (c) 2025 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.
#include "textflag.h"
TEXT ·updateAsm(SB), $0-40
MOVD crc+0(FP), R0 // checksum
MOVD p_base+8(FP), R1 // start pointer
MOVD p_len+16(FP), R2 // length of buffer
MOVD $·const(SB), R3 // constants
MVN R0, R0
LSR $7, R2, R2
CMP $1, R2
BLT skip128
FLDPQ (R1), (F0, F1)
FLDPQ 32(R1), (F2, F3)
FLDPQ 64(R1), (F4, F5)
FLDPQ 96(R1), (F6, F7)
FMOVD R0, F8
VMOVI $0, V9.B16
VMOV V9.D[0], V8.D[1]
VEOR V8.B16, V0.B16, V0.B16
CMP $1, R2
BEQ tail128
MOVD 112(R3), R4
MOVD 120(R3), R5
FMOVD R4, F8
VDUP R5, V9.D2
loop128:
ADD $128, R1, R1
SUB $1, R2, R2
VPMULL V0.D1, V8.D1, V10.Q1
VPMULL2 V0.D2, V9.D2, V0.Q1
FLDPQ (R1), (F11, F12)
VEOR3 V0.B16, V11.B16, V10.B16, V0.B16
VPMULL V1.D1, V8.D1, V10.Q1
VPMULL2 V1.D2, V9.D2, V1.Q1
VEOR3 V1.B16, V12.B16, V10.B16, V1.B16
VPMULL V2.D1, V8.D1, V10.Q1
VPMULL2 V2.D2, V9.D2, V2.Q1
FLDPQ 32(R1), (F11, F12)
VEOR3 V2.B16, V11.B16, V10.B16, V2.B16
VPMULL V3.D1, V8.D1, V10.Q1
VPMULL2 V3.D2, V9.D2, V3.Q1
VEOR3 V3.B16, V12.B16, V10.B16, V3.B16
VPMULL V4.D1, V8.D1, V10.Q1
VPMULL2 V4.D2, V9.D2, V4.Q1
FLDPQ 64(R1), (F11, F12)
VEOR3 V4.B16, V11.B16, V10.B16, V4.B16
VPMULL V5.D1, V8.D1, V10.Q1
VPMULL2 V5.D2, V9.D2, V5.Q1
VEOR3 V5.B16, V12.B16, V10.B16, V5.B16
VPMULL V6.D1, V8.D1, V10.Q1
VPMULL2 V6.D2, V9.D2, V6.Q1
FLDPQ 96(R1), (F11, F12)
VEOR3 V6.B16, V11.B16, V10.B16, V6.B16
VPMULL V7.D1, V8.D1, V10.Q1
VPMULL2 V7.D2, V9.D2, V7.Q1
VEOR3 V7.B16, V12.B16, V10.B16, V7.B16
CMP $1, R2
BHI loop128
tail128:
MOVD (R3), R4
FMOVD R4, F11
VPMULL V0.D1, V11.D1, V11.Q1
MOVD 8(R3), R4
VDUP R4, V12.D2
VPMULL2 V0.D2, V12.D2, V0.Q1
VEOR3 V0.B16, V7.B16, V11.B16, V7.B16
MOVD 16(R3), R4
FMOVD R4, F11
VPMULL V1.D1, V11.D1, V11.Q1
MOVD 24(R3), R4
VDUP R4, V12.D2
VPMULL2 V1.D2, V12.D2, V1.Q1
VEOR3 V1.B16, V11.B16, V7.B16, V1.B16
MOVD 32(R3), R4
FMOVD R4, F11
VPMULL V2.D1, V11.D1, V11.Q1
MOVD 40(R3), R4
VDUP R4, V12.D2
VPMULL2 V2.D2, V12.D2, V2.Q1
VEOR3 V2.B16, V11.B16, V1.B16, V2.B16
MOVD 48(R3), R4
FMOVD R4, F11
VPMULL V3.D1, V11.D1, V11.Q1
MOVD 56(R3), R4
VDUP R4, V12.D2
VPMULL2 V3.D2, V12.D2, V3.Q1
VEOR3 V3.B16, V11.B16, V2.B16, V3.B16
MOVD 64(R3), R4
FMOVD R4, F11
VPMULL V4.D1, V11.D1, V11.Q1
MOVD 72(R3), R4
VDUP R4, V12.D2
VPMULL2 V4.D2, V12.D2, V4.Q1
VEOR3 V4.B16, V11.B16, V3.B16, V4.B16
MOVD 80(R3), R4
FMOVD R4, F11
VPMULL V5.D1, V11.D1, V11.Q1
MOVD 88(R3), R4
VDUP R4, V12.D2
VPMULL2 V5.D2, V12.D2, V5.Q1
VEOR3 V5.B16, V11.B16, V4.B16, V5.B16
MOVD 96(R3), R4
FMOVD R4, F11
VPMULL V6.D1, V11.D1, V11.Q1
MOVD 104(R3), R4
VDUP R4, V12.D2
VPMULL2 V6.D2, V12.D2, V6.Q1
VEOR3 V6.B16, V11.B16, V5.B16, V6.B16
FMOVD R4, F5
VPMULL V6.D1, V5.D1, V5.Q1
VDUP V6.D[1], V6.D2
VEOR V5.B8, V6.B8, V6.B8
MOVD 128(R3), R4
FMOVD R4, F4
VPMULL V4.D1, V6.D1, V6.Q1
FMOVD F6, R4
MOVD 136(R3), R5
FMOVD R5, F4
VPMULL V4.D1, V6.D1, V6.Q1
VEOR V6.B16, V5.B16, V6.B16
VMOV V6.D[1], R5
EOR R4, R5, R0
skip128:
MVN R0, R0
MOVD R0, checksum+32(FP)
RET
DATA ·const+0x000(SB)/8, $0xd083dd594d96319d // K_959
DATA ·const+0x008(SB)/8, $0x946588403d4adcbc // K_895
DATA ·const+0x010(SB)/8, $0x3c255f5ebc414423 // K_831
DATA ·const+0x018(SB)/8, $0x34f5a24e22d66e90 // K_767
DATA ·const+0x020(SB)/8, $0x7b0ab10dd0f809fe // K_703
DATA ·const+0x028(SB)/8, $0x03363823e6e791e5 // K_639
DATA ·const+0x030(SB)/8, $0x0c32cdb31e18a84a // K_575
DATA ·const+0x038(SB)/8, $0x62242240ace5045a // K_511
DATA ·const+0x040(SB)/8, $0xbdd7ac0ee1a4a0f0 // K_447
DATA ·const+0x048(SB)/8, $0xa3ffdc1fe8e82a8b // K_383
DATA ·const+0x050(SB)/8, $0xb0bc2e589204f500 // K_319
DATA ·const+0x058(SB)/8, $0xe1e0bb9d45d7a44c // K_255
DATA ·const+0x060(SB)/8, $0xeadc41fd2ba3d420 // K_191
DATA ·const+0x068(SB)/8, $0x21e9761e252621ac // K_127
DATA ·const+0x070(SB)/8, $0xa1ca681e733f9c40 // K_1087
DATA ·const+0x078(SB)/8, $0x5f852fb61e8d92dc // K_1023
DATA ·const+0x080(SB)/8, $0x27ecfa329aef9f77 // MU
DATA ·const+0x088(SB)/8, $0x34d926535897936b // POLY
GLOBL ·const(SB), (NOPTR+RODATA), $144
|