summaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/crypto/chacha20/chacha_ppc64x.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/crypto/chacha20/chacha_ppc64x.s')
-rw-r--r--vendor/golang.org/x/crypto/chacha20/chacha_ppc64x.s501
1 files changed, 0 insertions, 501 deletions
diff --git a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64x.s b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64x.s
deleted file mode 100644
index a660b4112..000000000
--- a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64x.s
+++ /dev/null
@@ -1,501 +0,0 @@
-// Copyright 2019 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Based on CRYPTOGAMS code with the following comment:
-// # ====================================================================
-// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-// # project. The module is, however, dual licensed under OpenSSL and
-// # CRYPTOGAMS licenses depending on where you obtain it. For further
-// # details see http://www.openssl.org/~appro/cryptogams/.
-// # ====================================================================
-
-// Code for the perl script that generates the ppc64 assembler
-// can be found in the cryptogams repository at the link below. It is based on
-// the original from openssl.
-
-// https://github.com/dot-asm/cryptogams/commit/a60f5b50ed908e91
-
-// The differences in this and the original implementation are
-// due to the calling conventions and initialization of constants.
-
-//go:build gc && !purego && (ppc64 || ppc64le)
-
-#include "textflag.h"
-
-#define OUT R3
-#define INP R4
-#define LEN R5
-#define KEY R6
-#define CNT R7
-#define TMP R15
-
-#define CONSTBASE R16
-#define BLOCKS R17
-
-// for VPERMXOR
-#define MASK R18
-
-DATA consts<>+0x00(SB)/4, $0x61707865
-DATA consts<>+0x04(SB)/4, $0x3320646e
-DATA consts<>+0x08(SB)/4, $0x79622d32
-DATA consts<>+0x0c(SB)/4, $0x6b206574
-DATA consts<>+0x10(SB)/4, $0x00000001
-DATA consts<>+0x14(SB)/4, $0x00000000
-DATA consts<>+0x18(SB)/4, $0x00000000
-DATA consts<>+0x1c(SB)/4, $0x00000000
-DATA consts<>+0x20(SB)/4, $0x00000004
-DATA consts<>+0x24(SB)/4, $0x00000000
-DATA consts<>+0x28(SB)/4, $0x00000000
-DATA consts<>+0x2c(SB)/4, $0x00000000
-DATA consts<>+0x30(SB)/4, $0x0e0f0c0d
-DATA consts<>+0x34(SB)/4, $0x0a0b0809
-DATA consts<>+0x38(SB)/4, $0x06070405
-DATA consts<>+0x3c(SB)/4, $0x02030001
-DATA consts<>+0x40(SB)/4, $0x0d0e0f0c
-DATA consts<>+0x44(SB)/4, $0x090a0b08
-DATA consts<>+0x48(SB)/4, $0x05060704
-DATA consts<>+0x4c(SB)/4, $0x01020300
-DATA consts<>+0x50(SB)/4, $0x61707865
-DATA consts<>+0x54(SB)/4, $0x61707865
-DATA consts<>+0x58(SB)/4, $0x61707865
-DATA consts<>+0x5c(SB)/4, $0x61707865
-DATA consts<>+0x60(SB)/4, $0x3320646e
-DATA consts<>+0x64(SB)/4, $0x3320646e
-DATA consts<>+0x68(SB)/4, $0x3320646e
-DATA consts<>+0x6c(SB)/4, $0x3320646e
-DATA consts<>+0x70(SB)/4, $0x79622d32
-DATA consts<>+0x74(SB)/4, $0x79622d32
-DATA consts<>+0x78(SB)/4, $0x79622d32
-DATA consts<>+0x7c(SB)/4, $0x79622d32
-DATA consts<>+0x80(SB)/4, $0x6b206574
-DATA consts<>+0x84(SB)/4, $0x6b206574
-DATA consts<>+0x88(SB)/4, $0x6b206574
-DATA consts<>+0x8c(SB)/4, $0x6b206574
-DATA consts<>+0x90(SB)/4, $0x00000000
-DATA consts<>+0x94(SB)/4, $0x00000001
-DATA consts<>+0x98(SB)/4, $0x00000002
-DATA consts<>+0x9c(SB)/4, $0x00000003
-DATA consts<>+0xa0(SB)/4, $0x11223300
-DATA consts<>+0xa4(SB)/4, $0x55667744
-DATA consts<>+0xa8(SB)/4, $0x99aabb88
-DATA consts<>+0xac(SB)/4, $0xddeeffcc
-DATA consts<>+0xb0(SB)/4, $0x22330011
-DATA consts<>+0xb4(SB)/4, $0x66774455
-DATA consts<>+0xb8(SB)/4, $0xaabb8899
-DATA consts<>+0xbc(SB)/4, $0xeeffccdd
-GLOBL consts<>(SB), RODATA, $0xc0
-
-#ifdef GOARCH_ppc64
-#define BE_XXBRW_INIT() \
- LVSL (R0)(R0), V24 \
- VSPLTISB $3, V25 \
- VXOR V24, V25, V24 \
-
-#define BE_XXBRW(vr) VPERM vr, vr, V24, vr
-#else
-#define BE_XXBRW_INIT()
-#define BE_XXBRW(vr)
-#endif
-
-//func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
-TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
- MOVD out+0(FP), OUT
- MOVD inp+8(FP), INP
- MOVD len+16(FP), LEN
- MOVD key+24(FP), KEY
- MOVD counter+32(FP), CNT
-
- // Addressing for constants
- MOVD $consts<>+0x00(SB), CONSTBASE
- MOVD $16, R8
- MOVD $32, R9
- MOVD $48, R10
- MOVD $64, R11
- SRD $6, LEN, BLOCKS
- // for VPERMXOR
- MOVD $consts<>+0xa0(SB), MASK
- MOVD $16, R20
- // V16
- LXVW4X (CONSTBASE)(R0), VS48
- ADD $80,CONSTBASE
-
- // Load key into V17,V18
- LXVW4X (KEY)(R0), VS49
- LXVW4X (KEY)(R8), VS50
-
- // Load CNT, NONCE into V19
- LXVW4X (CNT)(R0), VS51
-
- // Clear V27
- VXOR V27, V27, V27
-
- BE_XXBRW_INIT()
-
- // V28
- LXVW4X (CONSTBASE)(R11), VS60
-
- // Load mask constants for VPERMXOR
- LXVW4X (MASK)(R0), V20
- LXVW4X (MASK)(R20), V21
-
- // splat slot from V19 -> V26
- VSPLTW $0, V19, V26
-
- VSLDOI $4, V19, V27, V19
- VSLDOI $12, V27, V19, V19
-
- VADDUWM V26, V28, V26
-
- MOVD $10, R14
- MOVD R14, CTR
- PCALIGN $16
-loop_outer_vsx:
- // V0, V1, V2, V3
- LXVW4X (R0)(CONSTBASE), VS32
- LXVW4X (R8)(CONSTBASE), VS33
- LXVW4X (R9)(CONSTBASE), VS34
- LXVW4X (R10)(CONSTBASE), VS35
-
- // splat values from V17, V18 into V4-V11
- VSPLTW $0, V17, V4
- VSPLTW $1, V17, V5
- VSPLTW $2, V17, V6
- VSPLTW $3, V17, V7
- VSPLTW $0, V18, V8
- VSPLTW $1, V18, V9
- VSPLTW $2, V18, V10
- VSPLTW $3, V18, V11
-
- // VOR
- VOR V26, V26, V12
-
- // splat values from V19 -> V13, V14, V15
- VSPLTW $1, V19, V13
- VSPLTW $2, V19, V14
- VSPLTW $3, V19, V15
-
- // splat const values
- VSPLTISW $-16, V27
- VSPLTISW $12, V28
- VSPLTISW $8, V29
- VSPLTISW $7, V30
- PCALIGN $16
-loop_vsx:
- VADDUWM V0, V4, V0
- VADDUWM V1, V5, V1
- VADDUWM V2, V6, V2
- VADDUWM V3, V7, V3
-
- VPERMXOR V12, V0, V21, V12
- VPERMXOR V13, V1, V21, V13
- VPERMXOR V14, V2, V21, V14
- VPERMXOR V15, V3, V21, V15
-
- VADDUWM V8, V12, V8
- VADDUWM V9, V13, V9
- VADDUWM V10, V14, V10
- VADDUWM V11, V15, V11
-
- VXOR V4, V8, V4
- VXOR V5, V9, V5
- VXOR V6, V10, V6
- VXOR V7, V11, V7
-
- VRLW V4, V28, V4
- VRLW V5, V28, V5
- VRLW V6, V28, V6
- VRLW V7, V28, V7
-
- VADDUWM V0, V4, V0
- VADDUWM V1, V5, V1
- VADDUWM V2, V6, V2
- VADDUWM V3, V7, V3
-
- VPERMXOR V12, V0, V20, V12
- VPERMXOR V13, V1, V20, V13
- VPERMXOR V14, V2, V20, V14
- VPERMXOR V15, V3, V20, V15
-
- VADDUWM V8, V12, V8
- VADDUWM V9, V13, V9
- VADDUWM V10, V14, V10
- VADDUWM V11, V15, V11
-
- VXOR V4, V8, V4
- VXOR V5, V9, V5
- VXOR V6, V10, V6
- VXOR V7, V11, V7
-
- VRLW V4, V30, V4
- VRLW V5, V30, V5
- VRLW V6, V30, V6
- VRLW V7, V30, V7
-
- VADDUWM V0, V5, V0
- VADDUWM V1, V6, V1
- VADDUWM V2, V7, V2
- VADDUWM V3, V4, V3
-
- VPERMXOR V15, V0, V21, V15
- VPERMXOR V12, V1, V21, V12
- VPERMXOR V13, V2, V21, V13
- VPERMXOR V14, V3, V21, V14
-
- VADDUWM V10, V15, V10
- VADDUWM V11, V12, V11
- VADDUWM V8, V13, V8
- VADDUWM V9, V14, V9
-
- VXOR V5, V10, V5
- VXOR V6, V11, V6
- VXOR V7, V8, V7
- VXOR V4, V9, V4
-
- VRLW V5, V28, V5
- VRLW V6, V28, V6
- VRLW V7, V28, V7
- VRLW V4, V28, V4
-
- VADDUWM V0, V5, V0
- VADDUWM V1, V6, V1
- VADDUWM V2, V7, V2
- VADDUWM V3, V4, V3
-
- VPERMXOR V15, V0, V20, V15
- VPERMXOR V12, V1, V20, V12
- VPERMXOR V13, V2, V20, V13
- VPERMXOR V14, V3, V20, V14
-
- VADDUWM V10, V15, V10
- VADDUWM V11, V12, V11
- VADDUWM V8, V13, V8
- VADDUWM V9, V14, V9
-
- VXOR V5, V10, V5
- VXOR V6, V11, V6
- VXOR V7, V8, V7
- VXOR V4, V9, V4
-
- VRLW V5, V30, V5
- VRLW V6, V30, V6
- VRLW V7, V30, V7
- VRLW V4, V30, V4
- BDNZ loop_vsx
-
- VADDUWM V12, V26, V12
-
- VMRGEW V0, V1, V27
- VMRGEW V2, V3, V28
-
- VMRGOW V0, V1, V0
- VMRGOW V2, V3, V2
-
- VMRGEW V4, V5, V29
- VMRGEW V6, V7, V30
-
- XXPERMDI VS32, VS34, $0, VS33
- XXPERMDI VS32, VS34, $3, VS35
- XXPERMDI VS59, VS60, $0, VS32
- XXPERMDI VS59, VS60, $3, VS34
-
- VMRGOW V4, V5, V4
- VMRGOW V6, V7, V6
-
- VMRGEW V8, V9, V27
- VMRGEW V10, V11, V28
-
- XXPERMDI VS36, VS38, $0, VS37
- XXPERMDI VS36, VS38, $3, VS39
- XXPERMDI VS61, VS62, $0, VS36
- XXPERMDI VS61, VS62, $3, VS38
-
- VMRGOW V8, V9, V8
- VMRGOW V10, V11, V10
-
- VMRGEW V12, V13, V29
- VMRGEW V14, V15, V30
-
- XXPERMDI VS40, VS42, $0, VS41
- XXPERMDI VS40, VS42, $3, VS43
- XXPERMDI VS59, VS60, $0, VS40
- XXPERMDI VS59, VS60, $3, VS42
-
- VMRGOW V12, V13, V12
- VMRGOW V14, V15, V14
-
- VSPLTISW $4, V27
- VADDUWM V26, V27, V26
-
- XXPERMDI VS44, VS46, $0, VS45
- XXPERMDI VS44, VS46, $3, VS47
- XXPERMDI VS61, VS62, $0, VS44
- XXPERMDI VS61, VS62, $3, VS46
-
- VADDUWM V0, V16, V0
- VADDUWM V4, V17, V4
- VADDUWM V8, V18, V8
- VADDUWM V12, V19, V12
-
- BE_XXBRW(V0)
- BE_XXBRW(V4)
- BE_XXBRW(V8)
- BE_XXBRW(V12)
-
- CMPU LEN, $64
- BLT tail_vsx
-
- // Bottom of loop
- LXVW4X (INP)(R0), VS59
- LXVW4X (INP)(R8), VS60
- LXVW4X (INP)(R9), VS61
- LXVW4X (INP)(R10), VS62
-
- VXOR V27, V0, V27
- VXOR V28, V4, V28
- VXOR V29, V8, V29
- VXOR V30, V12, V30
-
- STXVW4X VS59, (OUT)(R0)
- STXVW4X VS60, (OUT)(R8)
- ADD $64, INP
- STXVW4X VS61, (OUT)(R9)
- ADD $-64, LEN
- STXVW4X VS62, (OUT)(R10)
- ADD $64, OUT
- BEQ done_vsx
-
- VADDUWM V1, V16, V0
- VADDUWM V5, V17, V4
- VADDUWM V9, V18, V8
- VADDUWM V13, V19, V12
-
- BE_XXBRW(V0)
- BE_XXBRW(V4)
- BE_XXBRW(V8)
- BE_XXBRW(V12)
-
- CMPU LEN, $64
- BLT tail_vsx
-
- LXVW4X (INP)(R0), VS59
- LXVW4X (INP)(R8), VS60
- LXVW4X (INP)(R9), VS61
- LXVW4X (INP)(R10), VS62
-
- VXOR V27, V0, V27
- VXOR V28, V4, V28
- VXOR V29, V8, V29
- VXOR V30, V12, V30
-
- STXVW4X VS59, (OUT)(R0)
- STXVW4X VS60, (OUT)(R8)
- ADD $64, INP
- STXVW4X VS61, (OUT)(R9)
- ADD $-64, LEN
- STXVW4X VS62, (OUT)(V10)
- ADD $64, OUT
- BEQ done_vsx
-
- VADDUWM V2, V16, V0
- VADDUWM V6, V17, V4
- VADDUWM V10, V18, V8
- VADDUWM V14, V19, V12
-
- BE_XXBRW(V0)
- BE_XXBRW(V4)
- BE_XXBRW(V8)
- BE_XXBRW(V12)
-
- CMPU LEN, $64
- BLT tail_vsx
-
- LXVW4X (INP)(R0), VS59
- LXVW4X (INP)(R8), VS60
- LXVW4X (INP)(R9), VS61
- LXVW4X (INP)(R10), VS62
-
- VXOR V27, V0, V27
- VXOR V28, V4, V28
- VXOR V29, V8, V29
- VXOR V30, V12, V30
-
- STXVW4X VS59, (OUT)(R0)
- STXVW4X VS60, (OUT)(R8)
- ADD $64, INP
- STXVW4X VS61, (OUT)(R9)
- ADD $-64, LEN
- STXVW4X VS62, (OUT)(R10)
- ADD $64, OUT
- BEQ done_vsx
-
- VADDUWM V3, V16, V0
- VADDUWM V7, V17, V4
- VADDUWM V11, V18, V8
- VADDUWM V15, V19, V12
-
- BE_XXBRW(V0)
- BE_XXBRW(V4)
- BE_XXBRW(V8)
- BE_XXBRW(V12)
-
- CMPU LEN, $64
- BLT tail_vsx
-
- LXVW4X (INP)(R0), VS59
- LXVW4X (INP)(R8), VS60
- LXVW4X (INP)(R9), VS61
- LXVW4X (INP)(R10), VS62
-
- VXOR V27, V0, V27
- VXOR V28, V4, V28
- VXOR V29, V8, V29
- VXOR V30, V12, V30
-
- STXVW4X VS59, (OUT)(R0)
- STXVW4X VS60, (OUT)(R8)
- ADD $64, INP
- STXVW4X VS61, (OUT)(R9)
- ADD $-64, LEN
- STXVW4X VS62, (OUT)(R10)
- ADD $64, OUT
-
- MOVD $10, R14
- MOVD R14, CTR
- BNE loop_outer_vsx
-
-done_vsx:
- // Increment counter by number of 64 byte blocks
- MOVWZ (CNT), R14
- ADD BLOCKS, R14
- MOVWZ R14, (CNT)
- RET
-
-tail_vsx:
- ADD $32, R1, R11
- MOVD LEN, CTR
-
- // Save values on stack to copy from
- STXVW4X VS32, (R11)(R0)
- STXVW4X VS36, (R11)(R8)
- STXVW4X VS40, (R11)(R9)
- STXVW4X VS44, (R11)(R10)
- ADD $-1, R11, R12
- ADD $-1, INP
- ADD $-1, OUT
- PCALIGN $16
-looptail_vsx:
- // Copying the result to OUT
- // in bytes.
- MOVBZU 1(R12), KEY
- MOVBZU 1(INP), TMP
- XOR KEY, TMP, KEY
- MOVBU KEY, 1(OUT)
- BDNZ looptail_vsx
-
- // Clear the stack values
- STXVW4X VS48, (R11)(R0)
- STXVW4X VS48, (R11)(R8)
- STXVW4X VS48, (R11)(R9)
- STXVW4X VS48, (R11)(R10)
- BR done_vsx