summaryrefslogtreecommitdiff
path: root/vendor/github.com/minio/crc64nvme/crc64_amd64.s
blob: 04d8cf7869a84f56f2ab313ae9cc00d8a7d87f87 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// Copyright (c) 2025 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.

#include "textflag.h"

TEXT ·updateAsm(SB), $0-40
	MOVQ crc+0(FP), AX    // checksum
	MOVQ p_base+8(FP), SI // start pointer
	MOVQ p_len+16(FP), CX // length of buffer
	NOTQ AX
	SHRQ $7, CX
	CMPQ CX, $1
	JLT  skip128

	VMOVDQA 0x00(SI), X0
	VMOVDQA 0x10(SI), X1
	VMOVDQA 0x20(SI), X2
	VMOVDQA 0x30(SI), X3
	VMOVDQA 0x40(SI), X4
	VMOVDQA 0x50(SI), X5
	VMOVDQA 0x60(SI), X6
	VMOVDQA 0x70(SI), X7
	MOVQ    AX, X8
	PXOR    X8, X0
	CMPQ    CX, $1
	JE      tail128

	MOVQ   $0xa1ca681e733f9c40, AX
	MOVQ   AX, X8
	MOVQ   $0x5f852fb61e8d92dc, AX
	PINSRQ $0x1, AX, X9

loop128:
	ADDQ      $128, SI
	SUBQ      $1, CX
	VMOVDQA   X0, X10
	PCLMULQDQ $0x00, X8, X10
	PCLMULQDQ $0x11, X9, X0
	PXOR      X10, X0
	PXOR      0(SI), X0
	VMOVDQA   X1, X10
	PCLMULQDQ $0x00, X8, X10
	PCLMULQDQ $0x11, X9, X1
	PXOR      X10, X1
	PXOR      0x10(SI), X1
	VMOVDQA   X2, X10
	PCLMULQDQ $0x00, X8, X10
	PCLMULQDQ $0x11, X9, X2
	PXOR      X10, X2
	PXOR      0x20(SI), X2
	VMOVDQA   X3, X10
	PCLMULQDQ $0x00, X8, X10
	PCLMULQDQ $0x11, X9, X3
	PXOR      X10, X3
	PXOR      0x30(SI), X3
	VMOVDQA   X4, X10
	PCLMULQDQ $0x00, X8, X10
	PCLMULQDQ $0x11, X9, X4
	PXOR      X10, X4
	PXOR      0x40(SI), X4
	VMOVDQA   X5, X10
	PCLMULQDQ $0x00, X8, X10
	PCLMULQDQ $0x11, X9, X5
	PXOR      X10, X5
	PXOR      0x50(SI), X5
	VMOVDQA   X6, X10
	PCLMULQDQ $0x00, X8, X10
	PCLMULQDQ $0x11, X9, X6
	PXOR      X10, X6
	PXOR      0x60(SI), X6
	VMOVDQA   X7, X10
	PCLMULQDQ $0x00, X8, X10
	PCLMULQDQ $0x11, X9, X7
	PXOR      X10, X7
	PXOR      0x70(SI), X7
	CMPQ      CX, $1
	JGT       loop128

tail128:
	MOVQ      $0xd083dd594d96319d, AX
	MOVQ      AX, X11
	PCLMULQDQ $0x00, X0, X11
	MOVQ      $0x946588403d4adcbc, AX
	PINSRQ    $0x1, AX, X12
	PCLMULQDQ $0x11, X12, X0
	PXOR      X11, X7
	PXOR      X0, X7
	MOVQ      $0x3c255f5ebc414423, AX
	MOVQ      AX, X11
	PCLMULQDQ $0x00, X1, X11
	MOVQ      $0x34f5a24e22d66e90, AX
	PINSRQ    $0x1, AX, X12
	PCLMULQDQ $0x11, X12, X1
	PXOR      X11, X1
	PXOR      X7, X1
	MOVQ      $0x7b0ab10dd0f809fe, AX
	MOVQ      AX, X11
	PCLMULQDQ $0x00, X2, X11
	MOVQ      $0x03363823e6e791e5, AX
	PINSRQ    $0x1, AX, X12
	PCLMULQDQ $0x11, X12, X2
	PXOR      X11, X2
	PXOR      X1, X2
	MOVQ      $0x0c32cdb31e18a84a, AX
	MOVQ      AX, X11
	PCLMULQDQ $0x00, X3, X11
	MOVQ      $0x62242240ace5045a, AX
	PINSRQ    $0x1, AX, X12
	PCLMULQDQ $0x11, X12, X3
	PXOR      X11, X3
	PXOR      X2, X3
	MOVQ      $0xbdd7ac0ee1a4a0f0, AX
	MOVQ      AX, X11
	PCLMULQDQ $0x00, X4, X11
	MOVQ      $0xa3ffdc1fe8e82a8b, AX
	PINSRQ    $0x1, AX, X12
	PCLMULQDQ $0x11, X12, X4
	PXOR      X11, X4
	PXOR      X3, X4
	MOVQ      $0xb0bc2e589204f500, AX
	MOVQ      AX, X11
	PCLMULQDQ $0x00, X5, X11
	MOVQ      $0xe1e0bb9d45d7a44c, AX
	PINSRQ    $0x1, AX, X12
	PCLMULQDQ $0x11, X12, X5
	PXOR      X11, X5
	PXOR      X4, X5
	MOVQ      $0xeadc41fd2ba3d420, AX
	MOVQ      AX, X11
	PCLMULQDQ $0x00, X6, X11
	MOVQ      $0x21e9761e252621ac, AX
	PINSRQ    $0x1, AX, X12
	PCLMULQDQ $0x11, X12, X6
	PXOR      X11, X6
	PXOR      X5, X6
	MOVQ      AX, X5
	PCLMULQDQ $0x00, X6, X5
	PSHUFD    $0xee, X6, X6
	PXOR      X5, X6
	MOVQ      $0x27ecfa329aef9f77, AX
	MOVQ      AX, X4
	PCLMULQDQ $0x00, X4, X6
	PEXTRQ    $0, X6, BX
	MOVQ      $0x34d926535897936b, AX
	MOVQ      AX, X4
	PCLMULQDQ $0x00, X4, X6
	PXOR      X5, X6
	PEXTRQ    $1, X6, AX
	XORQ      BX, AX

skip128:
	NOTQ AX
	MOVQ AX, checksum+32(FP)
	RET