summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/crypto/chacha20
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/crypto/chacha20')
-rw-r--r--vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s110
1 files changed, 52 insertions, 58 deletions
diff --git a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s
index 66aebae25..c672ccf69 100644
--- a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s
@@ -33,6 +33,9 @@
#define CONSTBASE R16
#define BLOCKS R17
+// for VPERMXOR
+#define MASK R18
+
DATA consts<>+0x00(SB)/8, $0x3320646e61707865
DATA consts<>+0x08(SB)/8, $0x6b20657479622d32
DATA consts<>+0x10(SB)/8, $0x0000000000000001
@@ -53,7 +56,11 @@ DATA consts<>+0x80(SB)/8, $0x6b2065746b206574
DATA consts<>+0x88(SB)/8, $0x6b2065746b206574
DATA consts<>+0x90(SB)/8, $0x0000000100000000
DATA consts<>+0x98(SB)/8, $0x0000000300000002
-GLOBL consts<>(SB), RODATA, $0xa0
+DATA consts<>+0xa0(SB)/8, $0x5566774411223300
+DATA consts<>+0xa8(SB)/8, $0xddeeffcc99aabb88
+DATA consts<>+0xb0(SB)/8, $0x6677445522330011
+DATA consts<>+0xb8(SB)/8, $0xeeffccddaabb8899
+GLOBL consts<>(SB), RODATA, $0xc0
//func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
@@ -70,6 +77,9 @@ TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
MOVD $48, R10
MOVD $64, R11
SRD $6, LEN, BLOCKS
+ // for VPERMXOR
+ MOVD $consts<>+0xa0(SB), MASK
+ MOVD $16, R20
// V16
LXVW4X (CONSTBASE)(R0), VS48
ADD $80,CONSTBASE
@@ -87,6 +97,10 @@ TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
// V28
LXVW4X (CONSTBASE)(R11), VS60
+ // Load mask constants for VPERMXOR
+ LXVW4X (MASK)(R0), V20
+ LXVW4X (MASK)(R20), V21
+
// splat slot from V19 -> V26
VSPLTW $0, V19, V26
@@ -97,7 +111,7 @@ TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
MOVD $10, R14
MOVD R14, CTR
-
+ PCALIGN $16
loop_outer_vsx:
// V0, V1, V2, V3
LXVW4X (R0)(CONSTBASE), VS32
@@ -128,22 +142,17 @@ loop_outer_vsx:
VSPLTISW $12, V28
VSPLTISW $8, V29
VSPLTISW $7, V30
-
+ PCALIGN $16
loop_vsx:
VADDUWM V0, V4, V0
VADDUWM V1, V5, V1
VADDUWM V2, V6, V2
VADDUWM V3, V7, V3
- VXOR V12, V0, V12
- VXOR V13, V1, V13
- VXOR V14, V2, V14
- VXOR V15, V3, V15
-
- VRLW V12, V27, V12
- VRLW V13, V27, V13
- VRLW V14, V27, V14
- VRLW V15, V27, V15
+ VPERMXOR V12, V0, V21, V12
+ VPERMXOR V13, V1, V21, V13
+ VPERMXOR V14, V2, V21, V14
+ VPERMXOR V15, V3, V21, V15
VADDUWM V8, V12, V8
VADDUWM V9, V13, V9
@@ -165,15 +174,10 @@ loop_vsx:
VADDUWM V2, V6, V2
VADDUWM V3, V7, V3
- VXOR V12, V0, V12
- VXOR V13, V1, V13
- VXOR V14, V2, V14
- VXOR V15, V3, V15
-
- VRLW V12, V29, V12
- VRLW V13, V29, V13
- VRLW V14, V29, V14
- VRLW V15, V29, V15
+ VPERMXOR V12, V0, V20, V12
+ VPERMXOR V13, V1, V20, V13
+ VPERMXOR V14, V2, V20, V14
+ VPERMXOR V15, V3, V20, V15
VADDUWM V8, V12, V8
VADDUWM V9, V13, V9
@@ -195,15 +199,10 @@ loop_vsx:
VADDUWM V2, V7, V2
VADDUWM V3, V4, V3
- VXOR V15, V0, V15
- VXOR V12, V1, V12
- VXOR V13, V2, V13
- VXOR V14, V3, V14
-
- VRLW V15, V27, V15
- VRLW V12, V27, V12
- VRLW V13, V27, V13
- VRLW V14, V27, V14
+ VPERMXOR V15, V0, V21, V15
+ VPERMXOR V12, V1, V21, V12
+ VPERMXOR V13, V2, V21, V13
+ VPERMXOR V14, V3, V21, V14
VADDUWM V10, V15, V10
VADDUWM V11, V12, V11
@@ -225,15 +224,10 @@ loop_vsx:
VADDUWM V2, V7, V2
VADDUWM V3, V4, V3
- VXOR V15, V0, V15
- VXOR V12, V1, V12
- VXOR V13, V2, V13
- VXOR V14, V3, V14
-
- VRLW V15, V29, V15
- VRLW V12, V29, V12
- VRLW V13, V29, V13
- VRLW V14, V29, V14
+ VPERMXOR V15, V0, V20, V15
+ VPERMXOR V12, V1, V20, V12
+ VPERMXOR V13, V2, V20, V13
+ VPERMXOR V14, V3, V20, V14
VADDUWM V10, V15, V10
VADDUWM V11, V12, V11
@@ -249,48 +243,48 @@ loop_vsx:
VRLW V6, V30, V6
VRLW V7, V30, V7
VRLW V4, V30, V4
- BC 16, LT, loop_vsx
+ BDNZ loop_vsx
VADDUWM V12, V26, V12
- WORD $0x13600F8C // VMRGEW V0, V1, V27
- WORD $0x13821F8C // VMRGEW V2, V3, V28
+ VMRGEW V0, V1, V27
+ VMRGEW V2, V3, V28
- WORD $0x10000E8C // VMRGOW V0, V1, V0
- WORD $0x10421E8C // VMRGOW V2, V3, V2
+ VMRGOW V0, V1, V0
+ VMRGOW V2, V3, V2
- WORD $0x13A42F8C // VMRGEW V4, V5, V29
- WORD $0x13C63F8C // VMRGEW V6, V7, V30
+ VMRGEW V4, V5, V29
+ VMRGEW V6, V7, V30
XXPERMDI VS32, VS34, $0, VS33
XXPERMDI VS32, VS34, $3, VS35
XXPERMDI VS59, VS60, $0, VS32
XXPERMDI VS59, VS60, $3, VS34
- WORD $0x10842E8C // VMRGOW V4, V5, V4
- WORD $0x10C63E8C // VMRGOW V6, V7, V6
+ VMRGOW V4, V5, V4
+ VMRGOW V6, V7, V6
- WORD $0x13684F8C // VMRGEW V8, V9, V27
- WORD $0x138A5F8C // VMRGEW V10, V11, V28
+ VMRGEW V8, V9, V27
+ VMRGEW V10, V11, V28
XXPERMDI VS36, VS38, $0, VS37
XXPERMDI VS36, VS38, $3, VS39
XXPERMDI VS61, VS62, $0, VS36
XXPERMDI VS61, VS62, $3, VS38
- WORD $0x11084E8C // VMRGOW V8, V9, V8
- WORD $0x114A5E8C // VMRGOW V10, V11, V10
+ VMRGOW V8, V9, V8
+ VMRGOW V10, V11, V10
- WORD $0x13AC6F8C // VMRGEW V12, V13, V29
- WORD $0x13CE7F8C // VMRGEW V14, V15, V30
+ VMRGEW V12, V13, V29
+ VMRGEW V14, V15, V30
XXPERMDI VS40, VS42, $0, VS41
XXPERMDI VS40, VS42, $3, VS43
XXPERMDI VS59, VS60, $0, VS40
XXPERMDI VS59, VS60, $3, VS42
- WORD $0x118C6E8C // VMRGOW V12, V13, V12
- WORD $0x11CE7E8C // VMRGOW V14, V15, V14
+ VMRGOW V12, V13, V12
+ VMRGOW V14, V15, V14
VSPLTISW $4, V27
VADDUWM V26, V27, V26
@@ -431,7 +425,7 @@ tail_vsx:
ADD $-1, R11, R12
ADD $-1, INP
ADD $-1, OUT
-
+ PCALIGN $16
looptail_vsx:
// Copying the result to OUT
// in bytes.
@@ -439,7 +433,7 @@ looptail_vsx:
MOVBZU 1(INP), TMP
XOR KEY, TMP, KEY
MOVBU KEY, 1(OUT)
- BC 16, LT, looptail_vsx
+ BDNZ looptail_vsx
// Clear the stack values
STXVW4X VS48, (R11)(R0)