diff options
Diffstat (limited to 'vendor/github.com/tmthrgd/go-hex/hex_encode_amd64.s')
-rw-r--r-- | vendor/github.com/tmthrgd/go-hex/hex_encode_amd64.s | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/vendor/github.com/tmthrgd/go-hex/hex_encode_amd64.s b/vendor/github.com/tmthrgd/go-hex/hex_encode_amd64.s new file mode 100644 index 000000000..96e6e4caa --- /dev/null +++ b/vendor/github.com/tmthrgd/go-hex/hex_encode_amd64.s @@ -0,0 +1,227 @@ +// Copyright 2016 Tom Thorogood. All rights reserved. +// Use of this source code is governed by a +// Modified BSD License license that can be found in +// the LICENSE file. +// +// Copyright 2005-2016, Wojciech Muła. All rights reserved. +// Use of this source code is governed by a +// Simplified BSD License license that can be found in +// the LICENSE file. +// +// This file is auto-generated - do not modify + +// +build amd64,!gccgo,!appengine + +#include "textflag.h" + +DATA encodeMask<>+0x00(SB)/8, $0x0f0f0f0f0f0f0f0f +DATA encodeMask<>+0x08(SB)/8, $0x0f0f0f0f0f0f0f0f +GLOBL encodeMask<>(SB),RODATA,$16 + +TEXT ·encodeAVX(SB),NOSPLIT,$0 + MOVQ dst+0(FP), DI + MOVQ src+8(FP), SI + MOVQ len+16(FP), BX + MOVQ alpha+24(FP), DX + MOVOU (DX), X15 + CMPQ BX, $16 + JB tail +bigloop: + MOVOU -16(SI)(BX*1), X0 + VPAND encodeMask<>(SB), X0, X1 + PSRLW $4, X0 + PAND encodeMask<>(SB), X0 + VPUNPCKHBW X1, X0, X3 + PUNPCKLBW X1, X0 + VPSHUFB X0, X15, X1 + VPSHUFB X3, X15, X2 + MOVOU X2, -16(DI)(BX*2) + MOVOU X1, -32(DI)(BX*2) + SUBQ $16, BX + JZ ret + CMPQ BX, $16 + JAE bigloop +tail: + CMPQ BX, $2 + JB tail_in_1 + JE tail_in_2 + CMPQ BX, $4 + JB tail_in_3 + JE tail_in_4 + CMPQ BX, $6 + JB tail_in_5 + JE tail_in_6 + CMPQ BX, $8 + JB tail_in_7 +tail_in_8: + MOVQ (SI), X0 + JMP tail_conv +tail_in_7: + PINSRB $6, 6(SI), X0 +tail_in_6: + PINSRB $5, 5(SI), X0 +tail_in_5: + PINSRB $4, 4(SI), X0 +tail_in_4: + PINSRD $0, (SI), X0 + JMP tail_conv +tail_in_3: + PINSRB $2, 2(SI), X0 +tail_in_2: + PINSRB $1, 1(SI), X0 +tail_in_1: + PINSRB $0, (SI), X0 +tail_conv: + VPAND encodeMask<>(SB), X0, X1 + PSRLW $4, X0 + PAND encodeMask<>(SB), X0 + PUNPCKLBW X1, X0 + VPSHUFB X0, X15, X1 + CMPQ BX, $2 + JB tail_out_1 + JE tail_out_2 + CMPQ BX, $4 + JB tail_out_3 + JE tail_out_4 + CMPQ BX, $6 + JB tail_out_5 + JE tail_out_6 + CMPQ BX, $8 + JB tail_out_7 +tail_out_8: + MOVOU X1, (DI) + SUBQ $8, BX + JZ ret + ADDQ $8, SI + ADDQ $16, DI + JMP tail +tail_out_7: + PEXTRB $13, X1, 13(DI) + PEXTRB $12, X1, 12(DI) +tail_out_6: + PEXTRB $11, X1, 11(DI) + PEXTRB $10, X1, 10(DI) +tail_out_5: + PEXTRB $9, X1, 9(DI) + PEXTRB $8, X1, 8(DI) +tail_out_4: + MOVQ X1, (DI) + RET +tail_out_3: + PEXTRB $5, X1, 5(DI) + PEXTRB $4, X1, 4(DI) +tail_out_2: + PEXTRB $3, X1, 3(DI) + PEXTRB $2, X1, 2(DI) +tail_out_1: + PEXTRB $1, X1, 1(DI) + PEXTRB $0, X1, (DI) +ret: + RET + +TEXT ·encodeSSE(SB),NOSPLIT,$0 + MOVQ dst+0(FP), DI + MOVQ src+8(FP), SI + MOVQ len+16(FP), BX + MOVQ alpha+24(FP), DX + MOVOU (DX), X15 + CMPQ BX, $16 + JB tail +bigloop: + MOVOU -16(SI)(BX*1), X0 + MOVOU X0, X1 + PAND encodeMask<>(SB), X1 + PSRLW $4, X0 + PAND encodeMask<>(SB), X0 + MOVOU X0, X3 + PUNPCKHBW X1, X3 + PUNPCKLBW X1, X0 + MOVOU X15, X1 + PSHUFB X0, X1 + MOVOU X15, X2 + PSHUFB X3, X2 + MOVOU X2, -16(DI)(BX*2) + MOVOU X1, -32(DI)(BX*2) + SUBQ $16, BX + JZ ret + CMPQ BX, $16 + JAE bigloop +tail: + CMPQ BX, $2 + JB tail_in_1 + JE tail_in_2 + CMPQ BX, $4 + JB tail_in_3 + JE tail_in_4 + CMPQ BX, $6 + JB tail_in_5 + JE tail_in_6 + CMPQ BX, $8 + JB tail_in_7 +tail_in_8: + MOVQ (SI), X0 + JMP tail_conv +tail_in_7: + PINSRB $6, 6(SI), X0 +tail_in_6: + PINSRB $5, 5(SI), X0 +tail_in_5: + PINSRB $4, 4(SI), X0 +tail_in_4: + PINSRD $0, (SI), X0 + JMP tail_conv +tail_in_3: + PINSRB $2, 2(SI), X0 +tail_in_2: + PINSRB $1, 1(SI), X0 +tail_in_1: + PINSRB $0, (SI), X0 +tail_conv: + MOVOU X0, X1 + PAND encodeMask<>(SB), X1 + PSRLW $4, X0 + PAND encodeMask<>(SB), X0 + PUNPCKLBW X1, X0 + MOVOU X15, X1 + PSHUFB X0, X1 + CMPQ BX, $2 + JB tail_out_1 + JE tail_out_2 + CMPQ BX, $4 + JB tail_out_3 + JE tail_out_4 + CMPQ BX, $6 + JB tail_out_5 + JE tail_out_6 + CMPQ BX, $8 + JB tail_out_7 +tail_out_8: + MOVOU X1, (DI) + SUBQ $8, BX + JZ ret + ADDQ $8, SI + ADDQ $16, DI + JMP tail +tail_out_7: + PEXTRB $13, X1, 13(DI) + PEXTRB $12, X1, 12(DI) +tail_out_6: + PEXTRB $11, X1, 11(DI) + PEXTRB $10, X1, 10(DI) +tail_out_5: + PEXTRB $9, X1, 9(DI) + PEXTRB $8, X1, 8(DI) +tail_out_4: + MOVQ X1, (DI) + RET +tail_out_3: + PEXTRB $5, X1, 5(DI) + PEXTRB $4, X1, 4(DI) +tail_out_2: + PEXTRB $3, X1, 3(DI) + PEXTRB $2, X1, 2(DI) +tail_out_1: + PEXTRB $1, X1, 1(DI) + PEXTRB $0, X1, (DI) +ret: + RET |