diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s')
-rw-r--r-- | vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s | 655 |
1 files changed, 320 insertions, 335 deletions
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 9222d179c..54031aa31 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -274,7 +274,6 @@ matchlen_loop_repeat_extend_encodeBlockAsm: LEAL 8(R11), R11 CMPL R8, $0x08 JAE matchlen_loopback_repeat_extend_encodeBlockAsm - JZ repeat_extend_forward_end_encodeBlockAsm matchlen_match4_repeat_extend_encodeBlockAsm: CMPL R8, $0x04 @@ -282,21 +281,21 @@ matchlen_match4_repeat_extend_encodeBlockAsm: MOVL (R9)(R11*1), R10 CMPL (BX)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeBlockAsm - SUBL $0x04, R8 + LEAL -4(R8), R8 LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeBlockAsm: - CMPL R8, $0x02 - JB matchlen_match1_repeat_extend_encodeBlockAsm + CMPL R8, $0x01 + JE matchlen_match1_repeat_extend_encodeBlockAsm + JB repeat_extend_forward_end_encodeBlockAsm MOVW (R9)(R11*1), R10 CMPW (BX)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeBlockAsm - SUBL $0x02, R8 LEAL 2(R11), R11 + SUBL $0x02, R8 + JZ repeat_extend_forward_end_encodeBlockAsm matchlen_match1_repeat_extend_encodeBlockAsm: - CMPL R8, $0x01 - JB repeat_extend_forward_end_encodeBlockAsm MOVB (R9)(R11*1), R10 CMPB (BX)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm @@ -877,7 +876,6 @@ matchlen_loop_match_nolit_encodeBlockAsm: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeBlockAsm - JZ match_nolit_end_encodeBlockAsm matchlen_match4_match_nolit_encodeBlockAsm: CMPL SI, $0x04 @@ -885,21 +883,21 @@ matchlen_match4_match_nolit_encodeBlockAsm: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeBlockAsm - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeBlockAsm: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeBlockAsm + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeBlockAsm + JB match_nolit_end_encodeBlockAsm MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeBlockAsm - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeBlockAsm matchlen_match1_match_nolit_encodeBlockAsm: - CMPL SI, $0x01 - JB match_nolit_end_encodeBlockAsm MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm @@ -1637,7 +1635,6 @@ matchlen_loop_repeat_extend_encodeBlockAsm4MB: LEAL 8(R11), R11 CMPL R8, $0x08 JAE matchlen_loopback_repeat_extend_encodeBlockAsm4MB - JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match4_repeat_extend_encodeBlockAsm4MB: CMPL R8, $0x04 @@ -1645,21 +1642,21 @@ matchlen_match4_repeat_extend_encodeBlockAsm4MB: MOVL (R9)(R11*1), R10 CMPL (BX)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB - SUBL $0x04, R8 + LEAL -4(R8), R8 LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x02 - JB matchlen_match1_repeat_extend_encodeBlockAsm4MB + CMPL R8, $0x01 + JE matchlen_match1_repeat_extend_encodeBlockAsm4MB + JB repeat_extend_forward_end_encodeBlockAsm4MB MOVW (R9)(R11*1), R10 CMPW (BX)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB - SUBL $0x02, R8 LEAL 2(R11), R11 + SUBL $0x02, R8 + JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match1_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x01 - JB repeat_extend_forward_end_encodeBlockAsm4MB MOVB (R9)(R11*1), R10 CMPB (BX)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm4MB @@ -2190,7 +2187,6 @@ matchlen_loop_match_nolit_encodeBlockAsm4MB: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeBlockAsm4MB - JZ match_nolit_end_encodeBlockAsm4MB matchlen_match4_match_nolit_encodeBlockAsm4MB: CMPL SI, $0x04 @@ -2198,21 +2194,21 @@ matchlen_match4_match_nolit_encodeBlockAsm4MB: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeBlockAsm4MB - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeBlockAsm4MB + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeBlockAsm4MB + JB match_nolit_end_encodeBlockAsm4MB MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeBlockAsm4MB - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeBlockAsm4MB matchlen_match1_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x01 - JB match_nolit_end_encodeBlockAsm4MB MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm4MB @@ -2902,7 +2898,6 @@ matchlen_loop_repeat_extend_encodeBlockAsm12B: LEAL 8(R11), R11 CMPL R8, $0x08 JAE matchlen_loopback_repeat_extend_encodeBlockAsm12B - JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_match4_repeat_extend_encodeBlockAsm12B: CMPL R8, $0x04 @@ -2910,21 +2905,21 @@ matchlen_match4_repeat_extend_encodeBlockAsm12B: MOVL (R9)(R11*1), R10 CMPL (BX)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeBlockAsm12B - SUBL $0x04, R8 + LEAL -4(R8), R8 LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x02 - JB matchlen_match1_repeat_extend_encodeBlockAsm12B + CMPL R8, $0x01 + JE matchlen_match1_repeat_extend_encodeBlockAsm12B + JB repeat_extend_forward_end_encodeBlockAsm12B MOVW (R9)(R11*1), R10 CMPW (BX)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeBlockAsm12B - SUBL $0x02, R8 LEAL 2(R11), R11 + SUBL $0x02, R8 + JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_match1_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x01 - JB repeat_extend_forward_end_encodeBlockAsm12B MOVB (R9)(R11*1), R10 CMPB (BX)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm12B @@ -3333,7 +3328,6 @@ matchlen_loop_match_nolit_encodeBlockAsm12B: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeBlockAsm12B - JZ match_nolit_end_encodeBlockAsm12B matchlen_match4_match_nolit_encodeBlockAsm12B: CMPL SI, $0x04 @@ -3341,21 +3335,21 @@ matchlen_match4_match_nolit_encodeBlockAsm12B: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeBlockAsm12B - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeBlockAsm12B + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeBlockAsm12B + JB match_nolit_end_encodeBlockAsm12B MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeBlockAsm12B - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeBlockAsm12B matchlen_match1_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x01 - JB match_nolit_end_encodeBlockAsm12B MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm12B @@ -3935,7 +3929,6 @@ matchlen_loop_repeat_extend_encodeBlockAsm10B: LEAL 8(R11), R11 CMPL R8, $0x08 JAE matchlen_loopback_repeat_extend_encodeBlockAsm10B - JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_match4_repeat_extend_encodeBlockAsm10B: CMPL R8, $0x04 @@ -3943,21 +3936,21 @@ matchlen_match4_repeat_extend_encodeBlockAsm10B: MOVL (R9)(R11*1), R10 CMPL (BX)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeBlockAsm10B - SUBL $0x04, R8 + LEAL -4(R8), R8 LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x02 - JB matchlen_match1_repeat_extend_encodeBlockAsm10B + CMPL R8, $0x01 + JE matchlen_match1_repeat_extend_encodeBlockAsm10B + JB repeat_extend_forward_end_encodeBlockAsm10B MOVW (R9)(R11*1), R10 CMPW (BX)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeBlockAsm10B - SUBL $0x02, R8 LEAL 2(R11), R11 + SUBL $0x02, R8 + JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_match1_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x01 - JB repeat_extend_forward_end_encodeBlockAsm10B MOVB (R9)(R11*1), R10 CMPB (BX)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm10B @@ -4366,7 +4359,6 @@ matchlen_loop_match_nolit_encodeBlockAsm10B: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeBlockAsm10B - JZ match_nolit_end_encodeBlockAsm10B matchlen_match4_match_nolit_encodeBlockAsm10B: CMPL SI, $0x04 @@ -4374,21 +4366,21 @@ matchlen_match4_match_nolit_encodeBlockAsm10B: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeBlockAsm10B - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeBlockAsm10B + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeBlockAsm10B + JB match_nolit_end_encodeBlockAsm10B MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeBlockAsm10B - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeBlockAsm10B matchlen_match1_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x01 - JB match_nolit_end_encodeBlockAsm10B MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm10B @@ -4968,7 +4960,6 @@ matchlen_loop_repeat_extend_encodeBlockAsm8B: LEAL 8(R11), R11 CMPL R8, $0x08 JAE matchlen_loopback_repeat_extend_encodeBlockAsm8B - JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_match4_repeat_extend_encodeBlockAsm8B: CMPL R8, $0x04 @@ -4976,21 +4967,21 @@ matchlen_match4_repeat_extend_encodeBlockAsm8B: MOVL (R9)(R11*1), R10 CMPL (BX)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeBlockAsm8B - SUBL $0x04, R8 + LEAL -4(R8), R8 LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x02 - JB matchlen_match1_repeat_extend_encodeBlockAsm8B + CMPL R8, $0x01 + JE matchlen_match1_repeat_extend_encodeBlockAsm8B + JB repeat_extend_forward_end_encodeBlockAsm8B MOVW (R9)(R11*1), R10 CMPW (BX)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeBlockAsm8B - SUBL $0x02, R8 LEAL 2(R11), R11 + SUBL $0x02, R8 + JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_match1_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x01 - JB repeat_extend_forward_end_encodeBlockAsm8B MOVB (R9)(R11*1), R10 CMPB (BX)(R11*1), R10 JNE repeat_extend_forward_end_encodeBlockAsm8B @@ -5385,7 +5376,6 @@ matchlen_loop_match_nolit_encodeBlockAsm8B: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeBlockAsm8B - JZ match_nolit_end_encodeBlockAsm8B matchlen_match4_match_nolit_encodeBlockAsm8B: CMPL SI, $0x04 @@ -5393,21 +5383,21 @@ matchlen_match4_match_nolit_encodeBlockAsm8B: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeBlockAsm8B - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeBlockAsm8B + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeBlockAsm8B + JB match_nolit_end_encodeBlockAsm8B MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeBlockAsm8B - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeBlockAsm8B matchlen_match1_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x01 - JB match_nolit_end_encodeBlockAsm8B MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeBlockAsm8B @@ -5889,7 +5879,6 @@ matchlen_loop_match_nolit_encodeBetterBlockAsm: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm - JZ match_nolit_end_encodeBetterBlockAsm matchlen_match4_match_nolit_encodeBetterBlockAsm: CMPL DI, $0x04 @@ -5897,21 +5886,21 @@ matchlen_match4_match_nolit_encodeBetterBlockAsm: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeBetterBlockAsm + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeBetterBlockAsm + JB match_nolit_end_encodeBetterBlockAsm MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeBetterBlockAsm matchlen_match1_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x01 - JB match_nolit_end_encodeBetterBlockAsm MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm @@ -6607,24 +6596,26 @@ match_nolit_dst_ok_encodeBetterBlockAsm: MOVL R8, 24(SP)(R11*4) MOVL DI, 524312(SP)(R10*4) MOVL R13, 524312(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeBetterBlockAsm - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x08, DI - IMULQ BX, DI - SHRQ $0x2f, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x08, R9 IMULQ BX, R9 SHRQ $0x2f, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x08, R10 + IMULQ BX, R10 + SHRQ $0x2f, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: @@ -6960,7 +6951,6 @@ matchlen_loop_match_nolit_encodeBetterBlockAsm4MB: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm4MB - JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: CMPL DI, $0x04 @@ -6968,21 +6958,21 @@ matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeBetterBlockAsm4MB + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB + JB match_nolit_end_encodeBetterBlockAsm4MB MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_match1_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x01 - JB match_nolit_end_encodeBetterBlockAsm4MB MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm4MB @@ -7620,24 +7610,26 @@ match_nolit_dst_ok_encodeBetterBlockAsm4MB: MOVL R8, 24(SP)(R11*4) MOVL DI, 524312(SP)(R10*4) MOVL R13, 524312(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm4MB: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeBetterBlockAsm4MB - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x08, DI - IMULQ BX, DI - SHRQ $0x2f, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x08, R9 IMULQ BX, R9 SHRQ $0x2f, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x08, R10 + IMULQ BX, R10 + SHRQ $0x2f, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: @@ -7957,7 +7949,6 @@ matchlen_loop_match_nolit_encodeBetterBlockAsm12B: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm12B - JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_match4_match_nolit_encodeBetterBlockAsm12B: CMPL DI, $0x04 @@ -7965,21 +7956,21 @@ matchlen_match4_match_nolit_encodeBetterBlockAsm12B: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeBetterBlockAsm12B + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeBetterBlockAsm12B + JB match_nolit_end_encodeBetterBlockAsm12B MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_match1_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x01 - JB match_nolit_end_encodeBetterBlockAsm12B MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm12B @@ -8478,24 +8469,26 @@ match_nolit_dst_ok_encodeBetterBlockAsm12B: MOVL R8, 24(SP)(R11*4) MOVL DI, 65560(SP)(R10*4) MOVL R13, 65560(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm12B: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeBetterBlockAsm12B - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x10, DI - IMULQ BX, DI - SHRQ $0x32, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x32, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x10, R10 + IMULQ BX, R10 + SHRQ $0x32, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: @@ -8807,7 +8800,6 @@ matchlen_loop_match_nolit_encodeBetterBlockAsm10B: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm10B - JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_match4_match_nolit_encodeBetterBlockAsm10B: CMPL DI, $0x04 @@ -8815,21 +8807,21 @@ matchlen_match4_match_nolit_encodeBetterBlockAsm10B: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeBetterBlockAsm10B + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeBetterBlockAsm10B + JB match_nolit_end_encodeBetterBlockAsm10B MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_match1_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x01 - JB match_nolit_end_encodeBetterBlockAsm10B MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm10B @@ -9328,24 +9320,26 @@ match_nolit_dst_ok_encodeBetterBlockAsm10B: MOVL R8, 24(SP)(R11*4) MOVL DI, 16408(SP)(R10*4) MOVL R13, 16408(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm10B: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeBetterBlockAsm10B - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x10, DI - IMULQ BX, DI - SHRQ $0x34, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x34, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x10, R10 + IMULQ BX, R10 + SHRQ $0x34, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: @@ -9657,7 +9651,6 @@ matchlen_loop_match_nolit_encodeBetterBlockAsm8B: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeBetterBlockAsm8B - JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_match4_match_nolit_encodeBetterBlockAsm8B: CMPL DI, $0x04 @@ -9665,21 +9658,21 @@ matchlen_match4_match_nolit_encodeBetterBlockAsm8B: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeBetterBlockAsm8B + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeBetterBlockAsm8B + JB match_nolit_end_encodeBetterBlockAsm8B MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_match1_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x01 - JB match_nolit_end_encodeBetterBlockAsm8B MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeBetterBlockAsm8B @@ -10164,24 +10157,26 @@ match_nolit_dst_ok_encodeBetterBlockAsm8B: MOVL R8, 24(SP)(R11*4) MOVL DI, 4120(SP)(R10*4) MOVL R13, 4120(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeBetterBlockAsm8B: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeBetterBlockAsm8B - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x10, DI - IMULQ BX, DI - SHRQ $0x36, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x36, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x10, R10 + IMULQ BX, R10 + SHRQ $0x36, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: @@ -10605,7 +10600,6 @@ matchlen_loop_repeat_extend_encodeSnappyBlockAsm: LEAL 8(R10), R10 CMPL DI, $0x08 JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm - JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match4_repeat_extend_encodeSnappyBlockAsm: CMPL DI, $0x04 @@ -10613,21 +10607,21 @@ matchlen_match4_repeat_extend_encodeSnappyBlockAsm: MOVL (R8)(R10*1), R9 CMPL (BX)(R10*1), R9 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R10), R10 matchlen_match2_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x02 - JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm + CMPL DI, $0x01 + JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm + JB repeat_extend_forward_end_encodeSnappyBlockAsm MOVW (R8)(R10*1), R9 CMPW (BX)(R10*1), R9 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm - SUBL $0x02, DI LEAL 2(R10), R10 + SUBL $0x02, DI + JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match1_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x01 - JB repeat_extend_forward_end_encodeSnappyBlockAsm MOVB (R8)(R10*1), R9 CMPB (BX)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm @@ -10928,7 +10922,6 @@ matchlen_loop_match_nolit_encodeSnappyBlockAsm: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm - JZ match_nolit_end_encodeSnappyBlockAsm matchlen_match4_match_nolit_encodeSnappyBlockAsm: CMPL SI, $0x04 @@ -10936,21 +10929,21 @@ matchlen_match4_match_nolit_encodeSnappyBlockAsm: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBlockAsm + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBlockAsm + JB match_nolit_end_encodeSnappyBlockAsm MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeSnappyBlockAsm matchlen_match1_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x01 - JB match_nolit_end_encodeSnappyBlockAsm MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm @@ -11469,7 +11462,6 @@ matchlen_loop_repeat_extend_encodeSnappyBlockAsm64K: LEAL 8(R10), R10 CMPL DI, $0x08 JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm64K - JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: CMPL DI, $0x04 @@ -11477,21 +11469,21 @@ matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: MOVL (R8)(R10*1), R9 CMPL (BX)(R10*1), R9 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R10), R10 matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x02 - JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K + CMPL DI, $0x01 + JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K + JB repeat_extend_forward_end_encodeSnappyBlockAsm64K MOVW (R8)(R10*1), R9 CMPW (BX)(R10*1), R9 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K - SUBL $0x02, DI LEAL 2(R10), R10 + SUBL $0x02, DI + JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x01 - JB repeat_extend_forward_end_encodeSnappyBlockAsm64K MOVB (R8)(R10*1), R9 CMPB (BX)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K @@ -11752,7 +11744,6 @@ matchlen_loop_match_nolit_encodeSnappyBlockAsm64K: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm64K - JZ match_nolit_end_encodeSnappyBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: CMPL SI, $0x04 @@ -11760,21 +11751,21 @@ matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBlockAsm64K + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K + JB match_nolit_end_encodeSnappyBlockAsm64K MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeSnappyBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x01 - JB match_nolit_end_encodeSnappyBlockAsm64K MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm64K @@ -12253,7 +12244,6 @@ matchlen_loop_repeat_extend_encodeSnappyBlockAsm12B: LEAL 8(R10), R10 CMPL DI, $0x08 JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm12B - JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: CMPL DI, $0x04 @@ -12261,21 +12251,21 @@ matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: MOVL (R8)(R10*1), R9 CMPL (BX)(R10*1), R9 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R10), R10 matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x02 - JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B + CMPL DI, $0x01 + JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B + JB repeat_extend_forward_end_encodeSnappyBlockAsm12B MOVW (R8)(R10*1), R9 CMPW (BX)(R10*1), R9 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B - SUBL $0x02, DI LEAL 2(R10), R10 + SUBL $0x02, DI + JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x01 - JB repeat_extend_forward_end_encodeSnappyBlockAsm12B MOVB (R8)(R10*1), R9 CMPB (BX)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B @@ -12536,7 +12526,6 @@ matchlen_loop_match_nolit_encodeSnappyBlockAsm12B: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B - JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: CMPL SI, $0x04 @@ -12544,21 +12533,21 @@ matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBlockAsm12B + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B + JB match_nolit_end_encodeSnappyBlockAsm12B MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x01 - JB match_nolit_end_encodeSnappyBlockAsm12B MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm12B @@ -13037,7 +13026,6 @@ matchlen_loop_repeat_extend_encodeSnappyBlockAsm10B: LEAL 8(R10), R10 CMPL DI, $0x08 JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm10B - JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: CMPL DI, $0x04 @@ -13045,21 +13033,21 @@ matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: MOVL (R8)(R10*1), R9 CMPL (BX)(R10*1), R9 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R10), R10 matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x02 - JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B + CMPL DI, $0x01 + JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B + JB repeat_extend_forward_end_encodeSnappyBlockAsm10B MOVW (R8)(R10*1), R9 CMPW (BX)(R10*1), R9 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B - SUBL $0x02, DI LEAL 2(R10), R10 + SUBL $0x02, DI + JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x01 - JB repeat_extend_forward_end_encodeSnappyBlockAsm10B MOVB (R8)(R10*1), R9 CMPB (BX)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B @@ -13320,7 +13308,6 @@ matchlen_loop_match_nolit_encodeSnappyBlockAsm10B: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B - JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: CMPL SI, $0x04 @@ -13328,21 +13315,21 @@ matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBlockAsm10B + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B + JB match_nolit_end_encodeSnappyBlockAsm10B MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x01 - JB match_nolit_end_encodeSnappyBlockAsm10B MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm10B @@ -13821,7 +13808,6 @@ matchlen_loop_repeat_extend_encodeSnappyBlockAsm8B: LEAL 8(R10), R10 CMPL DI, $0x08 JAE matchlen_loopback_repeat_extend_encodeSnappyBlockAsm8B - JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: CMPL DI, $0x04 @@ -13829,21 +13815,21 @@ matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: MOVL (R8)(R10*1), R9 CMPL (BX)(R10*1), R9 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R10), R10 matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x02 - JB matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B + CMPL DI, $0x01 + JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B + JB repeat_extend_forward_end_encodeSnappyBlockAsm8B MOVW (R8)(R10*1), R9 CMPW (BX)(R10*1), R9 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B - SUBL $0x02, DI LEAL 2(R10), R10 + SUBL $0x02, DI + JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x01 - JB repeat_extend_forward_end_encodeSnappyBlockAsm8B MOVB (R8)(R10*1), R9 CMPB (BX)(R10*1), R9 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B @@ -14102,7 +14088,6 @@ matchlen_loop_match_nolit_encodeSnappyBlockAsm8B: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B - JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: CMPL SI, $0x04 @@ -14110,21 +14095,21 @@ matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBlockAsm8B + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B + JB match_nolit_end_encodeSnappyBlockAsm8B MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x01 - JB match_nolit_end_encodeSnappyBlockAsm8B MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_encodeSnappyBlockAsm8B @@ -14513,7 +14498,6 @@ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm - JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: CMPL DI, $0x04 @@ -14521,21 +14505,21 @@ matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm + JB match_nolit_end_encodeSnappyBetterBlockAsm MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x01 - JB match_nolit_end_encodeSnappyBetterBlockAsm MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm @@ -14790,24 +14774,26 @@ match_nolit_dst_ok_encodeSnappyBetterBlockAsm: MOVL R8, 24(SP)(R11*4) MOVL DI, 524312(SP)(R10*4) MOVL R13, 524312(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeSnappyBetterBlockAsm - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x08, DI - IMULQ BX, DI - SHRQ $0x2f, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x08, R9 IMULQ BX, R9 SHRQ $0x2f, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x08, R10 + IMULQ BX, R10 + SHRQ $0x2f, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: @@ -15135,7 +15121,6 @@ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm64K: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm64K - JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: CMPL DI, $0x04 @@ -15143,21 +15128,21 @@ matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K + JB match_nolit_end_encodeSnappyBetterBlockAsm64K MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x01 - JB match_nolit_end_encodeSnappyBetterBlockAsm64K MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K @@ -15363,24 +15348,26 @@ match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: MOVL R8, 24(SP)(R11*4) MOVL DI, 262168(SP)(R10*4) MOVL R13, 262168(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm64K: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x08, DI - IMULQ BX, DI - SHRQ $0x30, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x08, R9 IMULQ BX, R9 SHRQ $0x30, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x08, R10 + IMULQ BX, R10 + SHRQ $0x30, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: @@ -15692,7 +15679,6 @@ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm12B: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm12B - JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: CMPL DI, $0x04 @@ -15700,21 +15686,21 @@ matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B + JB match_nolit_end_encodeSnappyBetterBlockAsm12B MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x01 - JB match_nolit_end_encodeSnappyBetterBlockAsm12B MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B @@ -15920,24 +15906,26 @@ match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: MOVL R8, 24(SP)(R11*4) MOVL DI, 65560(SP)(R10*4) MOVL R13, 65560(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm12B: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x10, DI - IMULQ BX, DI - SHRQ $0x32, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x32, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x10, R10 + IMULQ BX, R10 + SHRQ $0x32, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: @@ -16249,7 +16237,6 @@ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm10B: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm10B - JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: CMPL DI, $0x04 @@ -16257,21 +16244,21 @@ matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B + JB match_nolit_end_encodeSnappyBetterBlockAsm10B MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x01 - JB match_nolit_end_encodeSnappyBetterBlockAsm10B MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B @@ -16477,24 +16464,26 @@ match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: MOVL R8, 24(SP)(R11*4) MOVL DI, 16408(SP)(R10*4) MOVL R13, 16408(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm10B: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x10, DI - IMULQ BX, DI - SHRQ $0x34, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x34, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x10, R10 + IMULQ BX, R10 + SHRQ $0x34, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: @@ -16806,7 +16795,6 @@ matchlen_loop_match_nolit_encodeSnappyBetterBlockAsm8B: LEAL 8(R11), R11 CMPL DI, $0x08 JAE matchlen_loopback_match_nolit_encodeSnappyBetterBlockAsm8B - JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: CMPL DI, $0x04 @@ -16814,21 +16802,21 @@ matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: MOVL (R8)(R11*1), R10 CMPL (R9)(R11*1), R10 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R11), R11 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x02 - JB matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B + CMPL DI, $0x01 + JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B + JB match_nolit_end_encodeSnappyBetterBlockAsm8B MOVW (R8)(R11*1), R10 CMPW (R9)(R11*1), R10 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B - SUBL $0x02, DI LEAL 2(R11), R11 + SUBL $0x02, DI + JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x01 - JB match_nolit_end_encodeSnappyBetterBlockAsm8B MOVB (R8)(R11*1), R10 CMPB (R9)(R11*1), R10 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B @@ -17032,24 +17020,26 @@ match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: MOVL R8, 24(SP)(R11*4) MOVL DI, 4120(SP)(R10*4) MOVL R13, 4120(SP)(R12*4) + LEAQ 1(R8)(SI*1), DI + SHRQ $0x01, DI ADDQ $0x01, SI SUBQ $0x01, R8 index_loop_encodeSnappyBetterBlockAsm8B: - CMPQ SI, R8 + CMPQ DI, R8 JAE search_loop_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(SI*1), DI - MOVQ (DX)(R8*1), R9 - SHLQ $0x10, DI - IMULQ BX, DI - SHRQ $0x36, DI + MOVQ (DX)(SI*1), R9 + MOVQ (DX)(DI*1), R10 SHLQ $0x10, R9 IMULQ BX, R9 SHRQ $0x36, R9 - MOVL SI, 24(SP)(DI*4) - MOVL R8, 24(SP)(R9*4) + SHLQ $0x10, R10 + IMULQ BX, R10 + SHRQ $0x36, R10 + MOVL SI, 24(SP)(R9*4) + MOVL DI, 24(SP)(R10*4) ADDQ $0x02, SI - SUBQ $0x02, R8 + ADDQ $0x02, DI JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: @@ -17378,7 +17368,6 @@ matchlen_loop_repeat_extend_calcBlockSize: LEAL 8(R10), R10 CMPL DI, $0x08 JAE matchlen_loopback_repeat_extend_calcBlockSize - JZ repeat_extend_forward_end_calcBlockSize matchlen_match4_repeat_extend_calcBlockSize: CMPL DI, $0x04 @@ -17386,21 +17375,21 @@ matchlen_match4_repeat_extend_calcBlockSize: MOVL (R8)(R10*1), R9 CMPL (BX)(R10*1), R9 JNE matchlen_match2_repeat_extend_calcBlockSize - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R10), R10 matchlen_match2_repeat_extend_calcBlockSize: - CMPL DI, $0x02 - JB matchlen_match1_repeat_extend_calcBlockSize + CMPL DI, $0x01 + JE matchlen_match1_repeat_extend_calcBlockSize + JB repeat_extend_forward_end_calcBlockSize MOVW (R8)(R10*1), R9 CMPW (BX)(R10*1), R9 JNE matchlen_match1_repeat_extend_calcBlockSize - SUBL $0x02, DI LEAL 2(R10), R10 + SUBL $0x02, DI + JZ repeat_extend_forward_end_calcBlockSize matchlen_match1_repeat_extend_calcBlockSize: - CMPL DI, $0x01 - JB repeat_extend_forward_end_calcBlockSize MOVB (R8)(R10*1), R9 CMPB (BX)(R10*1), R9 JNE repeat_extend_forward_end_calcBlockSize @@ -17590,7 +17579,6 @@ matchlen_loop_match_nolit_calcBlockSize: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_calcBlockSize - JZ match_nolit_end_calcBlockSize matchlen_match4_match_nolit_calcBlockSize: CMPL SI, $0x04 @@ -17598,21 +17586,21 @@ matchlen_match4_match_nolit_calcBlockSize: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_calcBlockSize - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_calcBlockSize: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_calcBlockSize + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_calcBlockSize + JB match_nolit_end_calcBlockSize MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_calcBlockSize - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_calcBlockSize matchlen_match1_match_nolit_calcBlockSize: - CMPL SI, $0x01 - JB match_nolit_end_calcBlockSize MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_calcBlockSize @@ -17909,7 +17897,6 @@ matchlen_loop_repeat_extend_calcBlockSizeSmall: LEAL 8(R10), R10 CMPL DI, $0x08 JAE matchlen_loopback_repeat_extend_calcBlockSizeSmall - JZ repeat_extend_forward_end_calcBlockSizeSmall matchlen_match4_repeat_extend_calcBlockSizeSmall: CMPL DI, $0x04 @@ -17917,21 +17904,21 @@ matchlen_match4_repeat_extend_calcBlockSizeSmall: MOVL (R8)(R10*1), R9 CMPL (BX)(R10*1), R9 JNE matchlen_match2_repeat_extend_calcBlockSizeSmall - SUBL $0x04, DI + LEAL -4(DI), DI LEAL 4(R10), R10 matchlen_match2_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x02 - JB matchlen_match1_repeat_extend_calcBlockSizeSmall + CMPL DI, $0x01 + JE matchlen_match1_repeat_extend_calcBlockSizeSmall + JB repeat_extend_forward_end_calcBlockSizeSmall MOVW (R8)(R10*1), R9 CMPW (BX)(R10*1), R9 JNE matchlen_match1_repeat_extend_calcBlockSizeSmall - SUBL $0x02, DI LEAL 2(R10), R10 + SUBL $0x02, DI + JZ repeat_extend_forward_end_calcBlockSizeSmall matchlen_match1_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x01 - JB repeat_extend_forward_end_calcBlockSizeSmall MOVB (R8)(R10*1), R9 CMPB (BX)(R10*1), R9 JNE repeat_extend_forward_end_calcBlockSizeSmall @@ -18091,7 +18078,6 @@ matchlen_loop_match_nolit_calcBlockSizeSmall: LEAL 8(R9), R9 CMPL SI, $0x08 JAE matchlen_loopback_match_nolit_calcBlockSizeSmall - JZ match_nolit_end_calcBlockSizeSmall matchlen_match4_match_nolit_calcBlockSizeSmall: CMPL SI, $0x04 @@ -18099,21 +18085,21 @@ matchlen_match4_match_nolit_calcBlockSizeSmall: MOVL (DI)(R9*1), R8 CMPL (BX)(R9*1), R8 JNE matchlen_match2_match_nolit_calcBlockSizeSmall - SUBL $0x04, SI + LEAL -4(SI), SI LEAL 4(R9), R9 matchlen_match2_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x02 - JB matchlen_match1_match_nolit_calcBlockSizeSmall + CMPL SI, $0x01 + JE matchlen_match1_match_nolit_calcBlockSizeSmall + JB match_nolit_end_calcBlockSizeSmall MOVW (DI)(R9*1), R8 CMPW (BX)(R9*1), R8 JNE matchlen_match1_match_nolit_calcBlockSizeSmall - SUBL $0x02, SI LEAL 2(R9), R9 + SUBL $0x02, SI + JZ match_nolit_end_calcBlockSizeSmall matchlen_match1_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x01 - JB match_nolit_end_calcBlockSizeSmall MOVB (DI)(R9*1), R8 CMPB (BX)(R9*1), R8 JNE match_nolit_end_calcBlockSizeSmall @@ -18879,7 +18865,6 @@ matchlen_loop_standalone: LEAL 8(SI), SI CMPL DX, $0x08 JAE matchlen_loopback_standalone - JZ gen_match_len_end matchlen_match4_standalone: CMPL DX, $0x04 @@ -18887,21 +18872,21 @@ matchlen_match4_standalone: MOVL (AX)(SI*1), BX CMPL (CX)(SI*1), BX JNE matchlen_match2_standalone - SUBL $0x04, DX + LEAL -4(DX), DX LEAL 4(SI), SI matchlen_match2_standalone: - CMPL DX, $0x02 - JB matchlen_match1_standalone + CMPL DX, $0x01 + JE matchlen_match1_standalone + JB gen_match_len_end MOVW (AX)(SI*1), BX CMPW (CX)(SI*1), BX JNE matchlen_match1_standalone - SUBL $0x02, DX LEAL 2(SI), SI + SUBL $0x02, DX + JZ gen_match_len_end matchlen_match1_standalone: - CMPL DX, $0x01 - JB gen_match_len_end MOVB (AX)(SI*1), BL CMPB (CX)(SI*1), BL JNE gen_match_len_end |