summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s')
-rw-r--r--vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s919
1 files changed, 494 insertions, 425 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
index 36915d949..81a487d6d 100644
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
@@ -1,7 +1,6 @@
// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
-// +build !appengine,!noasm,gc,!noasm
#include "textflag.h"
@@ -5743,9 +5742,9 @@ emit_literal_done_emit_remainder_encodeBlockAsm8B:
// func encodeBetterBlockAsm(dst []byte, src []byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm(SB), $327704-56
+TEXT ·encodeBetterBlockAsm(SB), $589848-56
MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
+ MOVQ $0x00001200, CX
LEAQ 24(SP), DX
PXOR X0, X0
@@ -5797,27 +5796,37 @@ check_maxskip_cont_encodeBetterBlockAsm:
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
+ MOVL 524312(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVL CX, 524312(SP)(R11*4)
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm
+
+no_short_found_encodeBetterBlockAsm:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm
candidateS_match_encodeBetterBlockAsm:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
@@ -6590,52 +6599,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm:
match_nolit_dst_ok_encodeBetterBlockAsm:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x2f, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 524312(SP)(R11*4)
+ MOVL R14, 524312(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x08, R8
+ IMULQ SI, R8
+ SHRQ $0x2f, R8
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm
emit_remainder_encodeBetterBlockAsm:
MOVQ src_len+32(FP), CX
@@ -6815,9 +6821,9 @@ emit_literal_done_emit_remainder_encodeBetterBlockAsm:
// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
// Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm4MB(SB), $327704-56
+TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56
MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
+ MOVQ $0x00001200, CX
LEAQ 24(SP), DX
PXOR X0, X0
@@ -6869,27 +6875,37 @@ check_maxskip_cont_encodeBetterBlockAsm4MB:
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
+ MOVL 524312(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVL CX, 524312(SP)(R11*4)
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm4MB
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm4MB
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm4MB
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm4MB
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm4MB
+
+no_short_found_encodeBetterBlockAsm4MB:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm4MB
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm4MB
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm4MB
candidateS_match_encodeBetterBlockAsm4MB:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
@@ -7600,52 +7616,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
match_nolit_dst_ok_encodeBetterBlockAsm4MB:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x2f, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 524312(SP)(R11*4)
+ MOVL R14, 524312(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm4MB:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm4MB
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x08, R8
+ IMULQ SI, R8
+ SHRQ $0x2f, R8
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm4MB
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm4MB
emit_remainder_encodeBetterBlockAsm4MB:
MOVQ src_len+32(FP), CX
@@ -7871,12 +7884,22 @@ search_loop_encodeBetterBlockAsm12B:
MOVL 65560(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 65560(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm12B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm12B
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm12B
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm12B
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm12B
+
+no_short_found_encodeBetterBlockAsm12B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm12B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm12B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm12B
candidateS_match_encodeBetterBlockAsm12B:
SHRQ $0x08, DI
@@ -8447,52 +8470,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
match_nolit_dst_ok_encodeBetterBlockAsm12B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x34, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x34, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x32, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x34, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 65560(SP)(R11*4)
- MOVL R15, 65560(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 65560(SP)(R11*4)
+ MOVL R14, 65560(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm12B:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm12B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x32, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x34, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 65560(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm12B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm12B
emit_remainder_encodeBetterBlockAsm12B:
MOVQ src_len+32(FP), CX
@@ -8707,12 +8727,22 @@ search_loop_encodeBetterBlockAsm10B:
MOVL 16408(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 16408(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm10B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm10B
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm10B
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm10B
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm10B
+
+no_short_found_encodeBetterBlockAsm10B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm10B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm10B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm10B
candidateS_match_encodeBetterBlockAsm10B:
SHRQ $0x08, DI
@@ -9283,52 +9313,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
match_nolit_dst_ok_encodeBetterBlockAsm10B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x36, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x36, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x34, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x36, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 16408(SP)(R11*4)
- MOVL R15, 16408(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 16408(SP)(R11*4)
+ MOVL R14, 16408(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm10B:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm10B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x34, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x36, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 16408(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm10B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm10B
emit_remainder_encodeBetterBlockAsm10B:
MOVQ src_len+32(FP), CX
@@ -9543,12 +9570,22 @@ search_loop_encodeBetterBlockAsm8B:
MOVL 4120(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 4120(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeBetterBlockAsm8B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeBetterBlockAsm8B
- MOVL 20(SP), CX
- JMP search_loop_encodeBetterBlockAsm8B
+ CMPQ R11, DI
+ JNE no_short_found_encodeBetterBlockAsm8B
+ MOVL R8, SI
+ JMP candidate_match_encodeBetterBlockAsm8B
+
+no_short_found_encodeBetterBlockAsm8B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeBetterBlockAsm8B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeBetterBlockAsm8B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeBetterBlockAsm8B
candidateS_match_encodeBetterBlockAsm8B:
SHRQ $0x08, DI
@@ -10105,52 +10142,49 @@ match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
match_nolit_dst_ok_encodeBetterBlockAsm8B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x38, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x38, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x36, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x38, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 4120(SP)(R11*4)
- MOVL R15, 4120(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 4120(SP)(R11*4)
+ MOVL R14, 4120(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeBetterBlockAsm8B:
+ CMPQ DI, R9
+ JAE search_loop_encodeBetterBlockAsm8B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x36, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x38, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 4120(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeBetterBlockAsm8B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeBetterBlockAsm8B
emit_remainder_encodeBetterBlockAsm8B:
MOVQ src_len+32(FP), CX
@@ -14287,9 +14321,9 @@ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
// Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm(SB), $327704-56
+TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56
MOVQ dst_base+0(FP), AX
- MOVQ $0x00000a00, CX
+ MOVQ $0x00001200, CX
LEAQ 24(SP), DX
PXOR X0, X0
@@ -14341,27 +14375,37 @@ check_maxskip_cont_encodeSnappyBetterBlockAsm:
MOVQ DI, R11
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ SI, R11
SHRQ $0x32, R11
MOVL 24(SP)(R10*4), SI
- MOVL 262168(SP)(R11*4), R8
+ MOVL 524312(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
- MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVL CX, 524312(SP)(R11*4)
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm
+
+no_short_found_encodeSnappyBetterBlockAsm:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm
candidateS_match_encodeSnappyBetterBlockAsm:
SHRQ $0x08, DI
MOVQ DI, R10
SHLQ $0x08, R10
IMULQ R9, R10
- SHRQ $0x30, R10
+ SHRQ $0x2f, R10
MOVL 24(SP)(R10*4), SI
INCL CX
MOVL CX, 24(SP)(R10*4)
@@ -14685,52 +14729,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x2f, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 524312(SP)(R11*4)
+ MOVL R14, 524312(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x08, R8
+ IMULQ SI, R8
+ SHRQ $0x2f, R8
SHLQ $0x08, R10
IMULQ SI, R10
- SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ SHRQ $0x2f, R10
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm
emit_remainder_encodeSnappyBetterBlockAsm:
MOVQ src_len+32(FP), CX
@@ -14964,12 +15005,22 @@ search_loop_encodeSnappyBetterBlockAsm64K:
MOVL 262168(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 262168(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm64K
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm64K
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm64K
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm64K
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm64K
+
+no_short_found_encodeSnappyBetterBlockAsm64K:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm64K
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm64K
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm64K
candidateS_match_encodeSnappyBetterBlockAsm64K:
SHRQ $0x08, DI
@@ -15248,52 +15299,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
MOVQ $0x00cf1bbcdcbfa563, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x32, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x32, R12
+ SHLQ $0x08, R12
+ IMULQ SI, R12
+ SHRQ $0x30, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x32, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 262168(SP)(R11*4)
- MOVL R15, 262168(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 262168(SP)(R11*4)
+ MOVL R14, 262168(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm64K:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm64K
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x08, R8
+ IMULQ SI, R8
+ SHRQ $0x30, R8
SHLQ $0x08, R10
IMULQ SI, R10
SHRQ $0x30, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x32, R11
- SHLQ $0x08, R13
- IMULQ SI, R13
- SHRQ $0x30, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 262168(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm64K
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm64K
emit_remainder_encodeSnappyBetterBlockAsm64K:
MOVQ src_len+32(FP), CX
@@ -15508,12 +15556,22 @@ search_loop_encodeSnappyBetterBlockAsm12B:
MOVL 65560(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 65560(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm12B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm12B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm12B
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm12B
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm12B
+
+no_short_found_encodeSnappyBetterBlockAsm12B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm12B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm12B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm12B
candidateS_match_encodeSnappyBetterBlockAsm12B:
SHRQ $0x08, DI
@@ -15792,52 +15850,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x34, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x34, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x32, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x34, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 65560(SP)(R11*4)
- MOVL R15, 65560(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 65560(SP)(R11*4)
+ MOVL R14, 65560(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm12B:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm12B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x32, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x32, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x34, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x32, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 65560(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm12B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm12B
emit_remainder_encodeSnappyBetterBlockAsm12B:
MOVQ src_len+32(FP), CX
@@ -16052,12 +16107,22 @@ search_loop_encodeSnappyBetterBlockAsm10B:
MOVL 16408(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 16408(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm10B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm10B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm10B
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm10B
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm10B
+
+no_short_found_encodeSnappyBetterBlockAsm10B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm10B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm10B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm10B
candidateS_match_encodeSnappyBetterBlockAsm10B:
SHRQ $0x08, DI
@@ -16336,52 +16401,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x36, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x36, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x34, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x36, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 16408(SP)(R11*4)
- MOVL R15, 16408(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 16408(SP)(R11*4)
+ MOVL R14, 16408(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm10B:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm10B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x34, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x34, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x36, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x34, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 16408(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm10B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm10B
emit_remainder_encodeSnappyBetterBlockAsm10B:
MOVQ src_len+32(FP), CX
@@ -16596,12 +16658,22 @@ search_loop_encodeSnappyBetterBlockAsm8B:
MOVL 4120(SP)(R11*4), R8
MOVL CX, 24(SP)(R10*4)
MOVL CX, 4120(SP)(R11*4)
- CMPL (DX)(SI*1), DI
+ MOVQ (DX)(SI*1), R10
+ MOVQ (DX)(R8*1), R11
+ CMPQ R10, DI
JEQ candidate_match_encodeSnappyBetterBlockAsm8B
- CMPL (DX)(R8*1), DI
- JEQ candidateS_match_encodeSnappyBetterBlockAsm8B
- MOVL 20(SP), CX
- JMP search_loop_encodeSnappyBetterBlockAsm8B
+ CMPQ R11, DI
+ JNE no_short_found_encodeSnappyBetterBlockAsm8B
+ MOVL R8, SI
+ JMP candidate_match_encodeSnappyBetterBlockAsm8B
+
+no_short_found_encodeSnappyBetterBlockAsm8B:
+ CMPL R10, DI
+ JEQ candidate_match_encodeSnappyBetterBlockAsm8B
+ CMPL R11, DI
+ JEQ candidateS_match_encodeSnappyBetterBlockAsm8B
+ MOVL 20(SP), CX
+ JMP search_loop_encodeSnappyBetterBlockAsm8B
candidateS_match_encodeSnappyBetterBlockAsm8B:
SHRQ $0x08, DI
@@ -16878,52 +16950,49 @@ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
MOVQ $0x0000cf1bbcdcbf9b, SI
MOVQ $0x9e3779b1, R8
- INCL DI
- MOVQ (DX)(DI*1), R9
- MOVQ R9, R10
- MOVQ R9, R11
- MOVQ R9, R12
- SHRQ $0x08, R11
- MOVQ R11, R13
- SHRQ $0x10, R12
- LEAL 1(DI), R14
- LEAL 2(DI), R15
- MOVQ -2(DX)(CX*1), R9
+ LEAQ 1(DI), DI
+ LEAQ -2(CX), R9
+ MOVQ (DX)(DI*1), R10
+ MOVQ 1(DX)(DI*1), R11
+ MOVQ (DX)(R9*1), R12
+ MOVQ 1(DX)(R9*1), R13
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
SHLQ $0x20, R11
IMULQ R8, R11
SHRQ $0x38, R11
- SHLQ $0x20, R12
- IMULQ R8, R12
- SHRQ $0x38, R12
+ SHLQ $0x10, R12
+ IMULQ SI, R12
+ SHRQ $0x36, R12
+ SHLQ $0x20, R13
+ IMULQ R8, R13
+ SHRQ $0x38, R13
+ LEAQ 1(DI), R8
+ LEAQ 1(R9), R14
MOVL DI, 24(SP)(R10*4)
- MOVL R14, 24(SP)(R13*4)
- MOVL R14, 4120(SP)(R11*4)
- MOVL R15, 4120(SP)(R12*4)
- MOVQ R9, R10
- MOVQ R9, R11
- SHRQ $0x08, R11
- MOVQ R11, R13
- LEAL -2(CX), R9
- LEAL -1(CX), DI
+ MOVL R9, 24(SP)(R12*4)
+ MOVL R8, 4120(SP)(R11*4)
+ MOVL R14, 4120(SP)(R13*4)
+ ADDQ $0x01, DI
+ SUBQ $0x01, R9
+
+index_loop_encodeSnappyBetterBlockAsm8B:
+ CMPQ DI, R9
+ JAE search_loop_encodeSnappyBetterBlockAsm8B
+ MOVQ (DX)(DI*1), R8
+ MOVQ (DX)(R9*1), R10
+ SHLQ $0x10, R8
+ IMULQ SI, R8
+ SHRQ $0x36, R8
SHLQ $0x10, R10
IMULQ SI, R10
SHRQ $0x36, R10
- SHLQ $0x20, R11
- IMULQ R8, R11
- SHRQ $0x38, R11
- SHLQ $0x10, R13
- IMULQ SI, R13
- SHRQ $0x36, R13
+ MOVL DI, 24(SP)(R8*4)
MOVL R9, 24(SP)(R10*4)
- MOVL DI, 4120(SP)(R11*4)
- MOVL DI, 24(SP)(R13*4)
- JMP search_loop_encodeSnappyBetterBlockAsm8B
+ ADDQ $0x02, DI
+ SUBQ $0x02, R9
+ JMP index_loop_encodeSnappyBetterBlockAsm8B
emit_remainder_encodeSnappyBetterBlockAsm8B:
MOVQ src_len+32(FP), CX