diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s')
-rw-r--r-- | vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s | 919 |
1 files changed, 494 insertions, 425 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 36915d949..81a487d6d 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -1,7 +1,6 @@ // Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT. //go:build !appengine && !noasm && gc && !noasm -// +build !appengine,!noasm,gc,!noasm #include "textflag.h" @@ -5743,9 +5742,9 @@ emit_literal_done_emit_remainder_encodeBlockAsm8B: // func encodeBetterBlockAsm(dst []byte, src []byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm(SB), $327704-56 +TEXT ·encodeBetterBlockAsm(SB), $589848-56 MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX + MOVQ $0x00001200, CX LEAQ 24(SP), DX PXOR X0, X0 @@ -5797,27 +5796,37 @@ check_maxskip_cont_encodeBetterBlockAsm: MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI - MOVL 262168(SP)(R11*4), R8 + MOVL 524312(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) - MOVL CX, 262168(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVL CX, 524312(SP)(R11*4) + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm + +no_short_found_encodeBetterBlockAsm: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) @@ -6590,52 +6599,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm: match_nolit_dst_ok_encodeBetterBlockAsm: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x32, R12 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 262168(SP)(R11*4) - MOVL R15, 262168(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 524312(SP)(R11*4) + MOVL R14, 524312(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x08, R8 + IMULQ SI, R8 + SHRQ $0x2f, R8 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 262168(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: MOVQ src_len+32(FP), CX @@ -6815,9 +6821,9 @@ emit_literal_done_emit_remainder_encodeBetterBlockAsm: // func encodeBetterBlockAsm4MB(dst []byte, src []byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56 +TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56 MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX + MOVQ $0x00001200, CX LEAQ 24(SP), DX PXOR X0, X0 @@ -6869,27 +6875,37 @@ check_maxskip_cont_encodeBetterBlockAsm4MB: MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI - MOVL 262168(SP)(R11*4), R8 + MOVL 524312(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) - MOVL CX, 262168(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVL CX, 524312(SP)(R11*4) + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm4MB - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm4MB + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm4MB + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm4MB + +no_short_found_encodeBetterBlockAsm4MB: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm4MB + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm4MB + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) @@ -7600,52 +7616,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: match_nolit_dst_ok_encodeBetterBlockAsm4MB: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x32, R12 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 262168(SP)(R11*4) - MOVL R15, 262168(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 524312(SP)(R11*4) + MOVL R14, 524312(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm4MB: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm4MB + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x08, R8 + IMULQ SI, R8 + SHRQ $0x2f, R8 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 262168(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm4MB + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: MOVQ src_len+32(FP), CX @@ -7871,12 +7884,22 @@ search_loop_encodeBetterBlockAsm12B: MOVL 65560(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 65560(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm12B - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm12B + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm12B + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm12B + +no_short_found_encodeBetterBlockAsm12B: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm12B + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm12B + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: SHRQ $0x08, DI @@ -8447,52 +8470,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm12B: match_nolit_dst_ok_encodeBetterBlockAsm12B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x32, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x34, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 65560(SP)(R11*4) - MOVL R15, 65560(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 65560(SP)(R11*4) + MOVL R14, 65560(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm12B: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm12B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x32, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x34, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x32, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 65560(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm12B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: MOVQ src_len+32(FP), CX @@ -8707,12 +8727,22 @@ search_loop_encodeBetterBlockAsm10B: MOVL 16408(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 16408(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm10B - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm10B + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm10B + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm10B + +no_short_found_encodeBetterBlockAsm10B: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm10B + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm10B + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: SHRQ $0x08, DI @@ -9283,52 +9313,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm10B: match_nolit_dst_ok_encodeBetterBlockAsm10B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x34, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x36, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 16408(SP)(R11*4) - MOVL R15, 16408(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 16408(SP)(R11*4) + MOVL R14, 16408(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm10B: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm10B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x34, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x36, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x34, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 16408(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm10B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: MOVQ src_len+32(FP), CX @@ -9543,12 +9570,22 @@ search_loop_encodeBetterBlockAsm8B: MOVL 4120(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 4120(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeBetterBlockAsm8B - MOVL 20(SP), CX - JMP search_loop_encodeBetterBlockAsm8B + CMPQ R11, DI + JNE no_short_found_encodeBetterBlockAsm8B + MOVL R8, SI + JMP candidate_match_encodeBetterBlockAsm8B + +no_short_found_encodeBetterBlockAsm8B: + CMPL R10, DI + JEQ candidate_match_encodeBetterBlockAsm8B + CMPL R11, DI + JEQ candidateS_match_encodeBetterBlockAsm8B + MOVL 20(SP), CX + JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: SHRQ $0x08, DI @@ -10105,52 +10142,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm8B: match_nolit_dst_ok_encodeBetterBlockAsm8B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x36, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x38, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x38, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 4120(SP)(R11*4) - MOVL R15, 4120(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 4120(SP)(R11*4) + MOVL R14, 4120(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeBetterBlockAsm8B: + CMPQ DI, R9 + JAE search_loop_encodeBetterBlockAsm8B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x36, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x38, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x36, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 4120(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeBetterBlockAsm8B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: MOVQ src_len+32(FP), CX @@ -14287,9 +14321,9 @@ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: // func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm(SB), $327704-56 +TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56 MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX + MOVQ $0x00001200, CX LEAQ 24(SP), DX PXOR X0, X0 @@ -14341,27 +14375,37 @@ check_maxskip_cont_encodeSnappyBetterBlockAsm: MOVQ DI, R11 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ SI, R11 SHRQ $0x32, R11 MOVL 24(SP)(R10*4), SI - MOVL 262168(SP)(R11*4), R8 + MOVL 524312(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) - MOVL CX, 262168(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVL CX, 524312(SP)(R11*4) + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm + +no_short_found_encodeSnappyBetterBlockAsm: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: SHRQ $0x08, DI MOVQ DI, R10 SHLQ $0x08, R10 IMULQ R9, R10 - SHRQ $0x30, R10 + SHRQ $0x2f, R10 MOVL 24(SP)(R10*4), SI INCL CX MOVL CX, 24(SP)(R10*4) @@ -14685,52 +14729,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: match_nolit_dst_ok_encodeSnappyBetterBlockAsm: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x32, R12 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 262168(SP)(R11*4) - MOVL R15, 262168(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 524312(SP)(R11*4) + MOVL R14, 524312(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x08, R8 + IMULQ SI, R8 + SHRQ $0x2f, R8 SHLQ $0x08, R10 IMULQ SI, R10 - SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + SHRQ $0x2f, R10 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 262168(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: MOVQ src_len+32(FP), CX @@ -14964,12 +15005,22 @@ search_loop_encodeSnappyBetterBlockAsm64K: MOVL 262168(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 262168(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm64K - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm64K + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm64K + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm64K + +no_short_found_encodeSnappyBetterBlockAsm64K: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm64K + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm64K + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: SHRQ $0x08, DI @@ -15248,52 +15299,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: MOVQ $0x00cf1bbcdcbfa563, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x32, R12 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x30, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 262168(SP)(R11*4) - MOVL R15, 262168(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 262168(SP)(R11*4) + MOVL R14, 262168(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm64K: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm64K + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x08, R8 + IMULQ SI, R8 + SHRQ $0x30, R8 SHLQ $0x08, R10 IMULQ SI, R10 SHRQ $0x30, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x32, R11 - SHLQ $0x08, R13 - IMULQ SI, R13 - SHRQ $0x30, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 262168(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm64K + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: MOVQ src_len+32(FP), CX @@ -15508,12 +15556,22 @@ search_loop_encodeSnappyBetterBlockAsm12B: MOVL 65560(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 65560(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm12B - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm12B + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm12B + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm12B + +no_short_found_encodeSnappyBetterBlockAsm12B: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm12B + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm12B + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: SHRQ $0x08, DI @@ -15792,52 +15850,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x32, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x34, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 65560(SP)(R11*4) - MOVL R15, 65560(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 65560(SP)(R11*4) + MOVL R14, 65560(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm12B: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm12B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x32, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x32, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x34, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x32, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 65560(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm12B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: MOVQ src_len+32(FP), CX @@ -16052,12 +16107,22 @@ search_loop_encodeSnappyBetterBlockAsm10B: MOVL 16408(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 16408(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm10B - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm10B + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm10B + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm10B + +no_short_found_encodeSnappyBetterBlockAsm10B: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm10B + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm10B + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: SHRQ $0x08, DI @@ -16336,52 +16401,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x34, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x36, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 16408(SP)(R11*4) - MOVL R15, 16408(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 16408(SP)(R11*4) + MOVL R14, 16408(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm10B: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm10B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x34, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x34, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x36, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x34, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 16408(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm10B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: MOVQ src_len+32(FP), CX @@ -16596,12 +16658,22 @@ search_loop_encodeSnappyBetterBlockAsm8B: MOVL 4120(SP)(R11*4), R8 MOVL CX, 24(SP)(R10*4) MOVL CX, 4120(SP)(R11*4) - CMPL (DX)(SI*1), DI + MOVQ (DX)(SI*1), R10 + MOVQ (DX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPL (DX)(R8*1), DI - JEQ candidateS_match_encodeSnappyBetterBlockAsm8B - MOVL 20(SP), CX - JMP search_loop_encodeSnappyBetterBlockAsm8B + CMPQ R11, DI + JNE no_short_found_encodeSnappyBetterBlockAsm8B + MOVL R8, SI + JMP candidate_match_encodeSnappyBetterBlockAsm8B + +no_short_found_encodeSnappyBetterBlockAsm8B: + CMPL R10, DI + JEQ candidate_match_encodeSnappyBetterBlockAsm8B + CMPL R11, DI + JEQ candidateS_match_encodeSnappyBetterBlockAsm8B + MOVL 20(SP), CX + JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: SHRQ $0x08, DI @@ -16878,52 +16950,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: MOVQ $0x0000cf1bbcdcbf9b, SI MOVQ $0x9e3779b1, R8 - INCL DI - MOVQ (DX)(DI*1), R9 - MOVQ R9, R10 - MOVQ R9, R11 - MOVQ R9, R12 - SHRQ $0x08, R11 - MOVQ R11, R13 - SHRQ $0x10, R12 - LEAL 1(DI), R14 - LEAL 2(DI), R15 - MOVQ -2(DX)(CX*1), R9 + LEAQ 1(DI), DI + LEAQ -2(CX), R9 + MOVQ (DX)(DI*1), R10 + MOVQ 1(DX)(DI*1), R11 + MOVQ (DX)(R9*1), R12 + MOVQ 1(DX)(R9*1), R13 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x36, R13 SHLQ $0x20, R11 IMULQ R8, R11 SHRQ $0x38, R11 - SHLQ $0x20, R12 - IMULQ R8, R12 - SHRQ $0x38, R12 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 MOVL DI, 24(SP)(R10*4) - MOVL R14, 24(SP)(R13*4) - MOVL R14, 4120(SP)(R11*4) - MOVL R15, 4120(SP)(R12*4) - MOVQ R9, R10 - MOVQ R9, R11 - SHRQ $0x08, R11 - MOVQ R11, R13 - LEAL -2(CX), R9 - LEAL -1(CX), DI + MOVL R9, 24(SP)(R12*4) + MOVL R8, 4120(SP)(R11*4) + MOVL R14, 4120(SP)(R13*4) + ADDQ $0x01, DI + SUBQ $0x01, R9 + +index_loop_encodeSnappyBetterBlockAsm8B: + CMPQ DI, R9 + JAE search_loop_encodeSnappyBetterBlockAsm8B + MOVQ (DX)(DI*1), R8 + MOVQ (DX)(R9*1), R10 + SHLQ $0x10, R8 + IMULQ SI, R8 + SHRQ $0x36, R8 SHLQ $0x10, R10 IMULQ SI, R10 SHRQ $0x36, R10 - SHLQ $0x20, R11 - IMULQ R8, R11 - SHRQ $0x38, R11 - SHLQ $0x10, R13 - IMULQ SI, R13 - SHRQ $0x36, R13 + MOVL DI, 24(SP)(R8*4) MOVL R9, 24(SP)(R10*4) - MOVL DI, 4120(SP)(R11*4) - MOVL DI, 24(SP)(R13*4) - JMP search_loop_encodeSnappyBetterBlockAsm8B + ADDQ $0x02, DI + SUBQ $0x02, R9 + JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: MOVQ src_len+32(FP), CX |