diff options
| author | 2021-08-12 21:03:24 +0200 | |
|---|---|---|
| committer | 2021-08-12 21:03:24 +0200 | |
| commit | 98263a7de64269898a2f81207e38943b5c8e8653 (patch) | |
| tree | 743c90f109a6c5d27832d1dcef2388d939f0f77a /vendor/github.com/tmthrgd/go-hex/hex_decode_amd64.s | |
| parent | Text duplication fix (#137) (diff) | |
| download | gotosocial-98263a7de64269898a2f81207e38943b5c8e8653.tar.xz | |
Grand test fixup (#138)
* start fixing up tests
* fix up tests + automate with drone
* fiddle with linting
* messing about with drone.yml
* some more fiddling
* hmmm
* add cache
* add vendor directory
* verbose
* ci updates
* update some little things
* update sig
Diffstat (limited to 'vendor/github.com/tmthrgd/go-hex/hex_decode_amd64.s')
| -rw-r--r-- | vendor/github.com/tmthrgd/go-hex/hex_decode_amd64.s | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/vendor/github.com/tmthrgd/go-hex/hex_decode_amd64.s b/vendor/github.com/tmthrgd/go-hex/hex_decode_amd64.s new file mode 100644 index 000000000..25d9cefb1 --- /dev/null +++ b/vendor/github.com/tmthrgd/go-hex/hex_decode_amd64.s @@ -0,0 +1,303 @@ +// Copyright 2016 Tom Thorogood. All rights reserved. +// Use of this source code is governed by a +// Modified BSD License license that can be found in +// the LICENSE file. +// +// Copyright 2005-2016, Wojciech Muła. All rights reserved. +// Use of this source code is governed by a +// Simplified BSD License license that can be found in +// the LICENSE file. +// +// This file is auto-generated - do not modify + +// +build amd64,!gccgo,!appengine + +#include "textflag.h" + +DATA decodeBase<>+0x00(SB)/8, $0x3030303030303030 +DATA decodeBase<>+0x08(SB)/8, $0x3030303030303030 +DATA decodeBase<>+0x10(SB)/8, $0x2727272727272727 +DATA decodeBase<>+0x18(SB)/8, $0x2727272727272727 +GLOBL decodeBase<>(SB),RODATA,$32 + +DATA decodeToLower<>+0x00(SB)/8, $0x2020202020202020 +DATA decodeToLower<>+0x08(SB)/8, $0x2020202020202020 +GLOBL decodeToLower<>(SB),RODATA,$16 + +DATA decodeHigh<>+0x00(SB)/8, $0x0e0c0a0806040200 +DATA decodeHigh<>+0x08(SB)/8, $0xffffffffffffffff +GLOBL decodeHigh<>(SB),RODATA,$16 + +DATA decodeLow<>+0x00(SB)/8, $0x0f0d0b0907050301 +DATA decodeLow<>+0x08(SB)/8, $0xffffffffffffffff +GLOBL decodeLow<>(SB),RODATA,$16 + +DATA decodeValid<>+0x00(SB)/8, $0xb0b0b0b0b0b0b0b0 +DATA decodeValid<>+0x08(SB)/8, $0xb0b0b0b0b0b0b0b0 +DATA decodeValid<>+0x10(SB)/8, $0xb9b9b9b9b9b9b9b9 +DATA decodeValid<>+0x18(SB)/8, $0xb9b9b9b9b9b9b9b9 +DATA decodeValid<>+0x20(SB)/8, $0xe1e1e1e1e1e1e1e1 +DATA decodeValid<>+0x28(SB)/8, $0xe1e1e1e1e1e1e1e1 +DATA decodeValid<>+0x30(SB)/8, $0xe6e6e6e6e6e6e6e6 +DATA decodeValid<>+0x38(SB)/8, $0xe6e6e6e6e6e6e6e6 +GLOBL decodeValid<>(SB),RODATA,$64 + +DATA decodeToSigned<>+0x00(SB)/8, $0x8080808080808080 +DATA decodeToSigned<>+0x08(SB)/8, $0x8080808080808080 +GLOBL decodeToSigned<>(SB),RODATA,$16 + +TEXT ·decodeAVX(SB),NOSPLIT,$0 + MOVQ dst+0(FP), DI + MOVQ src+8(FP), SI + MOVQ len+16(FP), BX + MOVQ SI, R15 + MOVOU decodeValid<>(SB), X14 + MOVOU decodeValid<>+0x20(SB), X15 + MOVW $65535, DX + CMPQ BX, $16 + JB tail +bigloop: + MOVOU (SI), X0 + VPXOR decodeToSigned<>(SB), X0, X1 + POR decodeToLower<>(SB), X0 + VPXOR decodeToSigned<>(SB), X0, X2 + VPCMPGTB X1, X14, X3 + PCMPGTB decodeValid<>+0x10(SB), X1 + VPCMPGTB X2, X15, X4 + PCMPGTB decodeValid<>+0x30(SB), X2 + PAND X4, X1 + POR X2, X3 + POR X1, X3 + PMOVMSKB X3, AX + TESTW AX, DX + JNZ invalid + PSUBB decodeBase<>(SB), X0 + PANDN decodeBase<>+0x10(SB), X4 + PSUBB X4, X0 + VPSHUFB decodeLow<>(SB), X0, X3 + PSHUFB decodeHigh<>(SB), X0 + PSLLW $4, X0 + POR X3, X0 + MOVQ X0, (DI) + SUBQ $16, BX + JZ ret + ADDQ $16, SI + ADDQ $8, DI + CMPQ BX, $16 + JAE bigloop +tail: + MOVQ $16, CX + SUBQ BX, CX + SHRW CX, DX + CMPQ BX, $4 + JB tail_in_2 + JE tail_in_4 + CMPQ BX, $8 + JB tail_in_6 + JE tail_in_8 + CMPQ BX, $12 + JB tail_in_10 + JE tail_in_12 +tail_in_14: + PINSRW $6, 12(SI), X0 +tail_in_12: + PINSRW $5, 10(SI), X0 +tail_in_10: + PINSRW $4, 8(SI), X0 +tail_in_8: + PINSRQ $0, (SI), X0 + JMP tail_conv +tail_in_6: + PINSRW $2, 4(SI), X0 +tail_in_4: + PINSRW $1, 2(SI), X0 +tail_in_2: + PINSRW $0, (SI), X0 +tail_conv: + VPXOR decodeToSigned<>(SB), X0, X1 + POR decodeToLower<>(SB), X0 + VPXOR decodeToSigned<>(SB), X0, X2 + VPCMPGTB X1, X14, X3 + PCMPGTB decodeValid<>+0x10(SB), X1 + VPCMPGTB X2, X15, X4 + PCMPGTB decodeValid<>+0x30(SB), X2 + PAND X4, X1 + POR X2, X3 + POR X1, X3 + PMOVMSKB X3, AX + TESTW AX, DX + JNZ invalid + PSUBB decodeBase<>(SB), X0 + PANDN decodeBase<>+0x10(SB), X4 + PSUBB X4, X0 + VPSHUFB decodeLow<>(SB), X0, X3 + PSHUFB decodeHigh<>(SB), X0 + PSLLW $4, X0 + POR X3, X0 + CMPQ BX, $4 + JB tail_out_2 + JE tail_out_4 + CMPQ BX, $8 + JB tail_out_6 + JE tail_out_8 + CMPQ BX, $12 + JB tail_out_10 + JE tail_out_12 +tail_out_14: + PEXTRB $6, X0, 6(DI) +tail_out_12: + PEXTRB $5, X0, 5(DI) +tail_out_10: + PEXTRB $4, X0, 4(DI) +tail_out_8: + MOVL X0, (DI) + JMP ret +tail_out_6: + PEXTRB $2, X0, 2(DI) +tail_out_4: + PEXTRB $1, X0, 1(DI) +tail_out_2: + PEXTRB $0, X0, (DI) +ret: + MOVB $1, ok+32(FP) + RET +invalid: + BSFW AX, AX + SUBQ R15, SI + ADDQ SI, AX + MOVQ AX, n+24(FP) + MOVB $0, ok+32(FP) + RET + +TEXT ·decodeSSE(SB),NOSPLIT,$0 + MOVQ dst+0(FP), DI + MOVQ src+8(FP), SI + MOVQ len+16(FP), BX + MOVQ SI, R15 + MOVOU decodeValid<>(SB), X14 + MOVOU decodeValid<>+0x20(SB), X15 + MOVW $65535, DX + CMPQ BX, $16 + JB tail +bigloop: + MOVOU (SI), X0 + MOVOU X0, X1 + PXOR decodeToSigned<>(SB), X1 + POR decodeToLower<>(SB), X0 + MOVOU X0, X2 + PXOR decodeToSigned<>(SB), X2 + MOVOU X14, X3 + PCMPGTB X1, X3 + PCMPGTB decodeValid<>+0x10(SB), X1 + MOVOU X15, X4 + PCMPGTB X2, X4 + PCMPGTB decodeValid<>+0x30(SB), X2 + PAND X4, X1 + POR X2, X3 + POR X1, X3 + PMOVMSKB X3, AX + TESTW AX, DX + JNZ invalid + PSUBB decodeBase<>(SB), X0 + PANDN decodeBase<>+0x10(SB), X4 + PSUBB X4, X0 + MOVOU X0, X3 + PSHUFB decodeLow<>(SB), X3 + PSHUFB decodeHigh<>(SB), X0 + PSLLW $4, X0 + POR X3, X0 + MOVQ X0, (DI) + SUBQ $16, BX + JZ ret + ADDQ $16, SI + ADDQ $8, DI + CMPQ BX, $16 + JAE bigloop +tail: + MOVQ $16, CX + SUBQ BX, CX + SHRW CX, DX + CMPQ BX, $4 + JB tail_in_2 + JE tail_in_4 + CMPQ BX, $8 + JB tail_in_6 + JE tail_in_8 + CMPQ BX, $12 + JB tail_in_10 + JE tail_in_12 +tail_in_14: + PINSRW $6, 12(SI), X0 +tail_in_12: + PINSRW $5, 10(SI), X0 +tail_in_10: + PINSRW $4, 8(SI), X0 +tail_in_8: + PINSRQ $0, (SI), X0 + JMP tail_conv +tail_in_6: + PINSRW $2, 4(SI), X0 +tail_in_4: + PINSRW $1, 2(SI), X0 +tail_in_2: + PINSRW $0, (SI), X0 +tail_conv: + MOVOU X0, X1 + PXOR decodeToSigned<>(SB), X1 + POR decodeToLower<>(SB), X0 + MOVOU X0, X2 + PXOR decodeToSigned<>(SB), X2 + MOVOU X14, X3 + PCMPGTB X1, X3 + PCMPGTB decodeValid<>+0x10(SB), X1 + MOVOU X15, X4 + PCMPGTB X2, X4 + PCMPGTB decodeValid<>+0x30(SB), X2 + PAND X4, X1 + POR X2, X3 + POR X1, X3 + PMOVMSKB X3, AX + TESTW AX, DX + JNZ invalid + PSUBB decodeBase<>(SB), X0 + PANDN decodeBase<>+0x10(SB), X4 + PSUBB X4, X0 + MOVOU X0, X3 + PSHUFB decodeLow<>(SB), X3 + PSHUFB decodeHigh<>(SB), X0 + PSLLW $4, X0 + POR X3, X0 + CMPQ BX, $4 + JB tail_out_2 + JE tail_out_4 + CMPQ BX, $8 + JB tail_out_6 + JE tail_out_8 + CMPQ BX, $12 + JB tail_out_10 + JE tail_out_12 +tail_out_14: + PEXTRB $6, X0, 6(DI) +tail_out_12: + PEXTRB $5, X0, 5(DI) +tail_out_10: + PEXTRB $4, X0, 4(DI) +tail_out_8: + MOVL X0, (DI) + JMP ret +tail_out_6: + PEXTRB $2, X0, 2(DI) +tail_out_4: + PEXTRB $1, X0, 1(DI) +tail_out_2: + PEXTRB $0, X0, (DI) +ret: + MOVB $1, ok+32(FP) + RET +invalid: + BSFW AX, AX + SUBQ R15, SI + ADDQ SI, AX + MOVQ AX, n+24(FP) + MOVB $0, ok+32(FP) + RET |
