summaryrefslogtreecommitdiff
path: root/vendor/github.com/cloudwego/iasm/x86_64/instructions.go
diff options
context:
space:
mode:
authorLibravatar dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>2024-05-06 08:50:47 +0000
committerLibravatar GitHub <noreply@github.com>2024-05-06 08:50:47 +0000
commita5f28fe0c923984c263592e82bbce99b0032b794 (patch)
tree403544ad5305eb171a85d2b4c59559f83abd87a7 /vendor/github.com/cloudwego/iasm/x86_64/instructions.go
parent[chore]: Bump golang.org/x/image from 0.15.0 to 0.16.0 (#2898) (diff)
downloadgotosocial-a5f28fe0c923984c263592e82bbce99b0032b794.tar.xz
[chore]: Bump github.com/gin-contrib/gzip from 1.0.0 to 1.0.1 (#2899)
Bumps [github.com/gin-contrib/gzip](https://github.com/gin-contrib/gzip) from 1.0.0 to 1.0.1. - [Release notes](https://github.com/gin-contrib/gzip/releases) - [Changelog](https://github.com/gin-contrib/gzip/blob/master/.goreleaser.yaml) - [Commits](https://github.com/gin-contrib/gzip/compare/v1.0.0...v1.0.1) --- updated-dependencies: - dependency-name: github.com/gin-contrib/gzip dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Diffstat (limited to 'vendor/github.com/cloudwego/iasm/x86_64/instructions.go')
-rw-r--r--vendor/github.com/cloudwego/iasm/x86_64/instructions.go97210
1 files changed, 97210 insertions, 0 deletions
diff --git a/vendor/github.com/cloudwego/iasm/x86_64/instructions.go b/vendor/github.com/cloudwego/iasm/x86_64/instructions.go
new file mode 100644
index 000000000..d9c069035
--- /dev/null
+++ b/vendor/github.com/cloudwego/iasm/x86_64/instructions.go
@@ -0,0 +1,97210 @@
+//
+// Copyright 2024 CloudWeGo Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Code generated by "mkasm_amd64.py", DO NOT EDIT.
+
+package x86_64
+
+// ADCB performs "Add with Carry".
+//
+// Mnemonic : ADC
+// Supported forms : (6 forms)
+//
+// * ADCB imm8, al
+// * ADCB imm8, r8
+// * ADCB r8, r8
+// * ADCB m8, r8
+// * ADCB imm8, m8
+// * ADCB r8, m8
+//
+func (self *Program) ADCB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADCB", 2, Operands { v0, v1 })
+ // ADCB imm8, al
+ if isImm8(v0) && v1 == AL {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x14)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADCB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0x80)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADCB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADCB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ADCB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x80)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADCB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x10)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADCB")
+ }
+ return p
+}
+
+// ADCL performs "Add with Carry".
+//
+// Mnemonic : ADC
+// Supported forms : (8 forms)
+//
+// * ADCL imm32, eax
+// * ADCL imm8, r32
+// * ADCL imm32, r32
+// * ADCL r32, r32
+// * ADCL m32, r32
+// * ADCL imm8, m32
+// * ADCL imm32, m32
+// * ADCL r32, m32
+//
+func (self *Program) ADCL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADCL", 2, Operands { v0, v1 })
+ // ADCL imm32, eax
+ if isImm32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x15)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADCL imm8, r32
+ if isImm8Ext(v0, 4) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADCL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADCL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADCL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x13)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ADCL imm8, m32
+ if isImm8Ext(v0, 4) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADCL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADCL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADCL")
+ }
+ return p
+}
+
+// ADCQ performs "Add with Carry".
+//
+// Mnemonic : ADC
+// Supported forms : (8 forms)
+//
+// * ADCQ imm32, rax
+// * ADCQ imm8, r64
+// * ADCQ imm32, r64
+// * ADCQ r64, r64
+// * ADCQ m64, r64
+// * ADCQ imm8, m64
+// * ADCQ imm32, m64
+// * ADCQ r64, m64
+//
+func (self *Program) ADCQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADCQ", 2, Operands { v0, v1 })
+ // ADCQ imm32, rax
+ if isImm32(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x15)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADCQ imm8, r64
+ if isImm8Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x83)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADCQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x81)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADCQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADCQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x13)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ADCQ imm8, m64
+ if isImm8Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x83)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADCQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x81)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADCQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADCQ")
+ }
+ return p
+}
+
+// ADCW performs "Add with Carry".
+//
+// Mnemonic : ADC
+// Supported forms : (8 forms)
+//
+// * ADCW imm16, ax
+// * ADCW imm8, r16
+// * ADCW imm16, r16
+// * ADCW r16, r16
+// * ADCW m16, r16
+// * ADCW imm8, m16
+// * ADCW imm16, m16
+// * ADCW r16, m16
+//
+func (self *Program) ADCW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADCW", 2, Operands { v0, v1 })
+ // ADCW imm16, ax
+ if isImm16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x15)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ADCW imm8, r16
+ if isImm8Ext(v0, 2) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADCW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ADCW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADCW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x13)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ADCW imm8, m16
+ if isImm8Ext(v0, 2) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADCW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ADCW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADCW")
+ }
+ return p
+}
+
+// ADCXL performs "Unsigned Integer Addition of Two Operands with Carry Flag".
+//
+// Mnemonic : ADCX
+// Supported forms : (2 forms)
+//
+// * ADCXL r32, r32 [ADX]
+// * ADCXL m32, r32 [ADX]
+//
+func (self *Program) ADCXL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADCXL", 2, Operands { v0, v1 })
+ // ADCXL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_ADX)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADCXL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_ADX)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADCXL")
+ }
+ return p
+}
+
+// ADCXQ performs "Unsigned Integer Addition of Two Operands with Carry Flag".
+//
+// Mnemonic : ADCX
+// Supported forms : (2 forms)
+//
+// * ADCXQ r64, r64 [ADX]
+// * ADCXQ m64, r64 [ADX]
+//
+func (self *Program) ADCXQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADCXQ", 2, Operands { v0, v1 })
+ // ADCXQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_ADX)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADCXQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_ADX)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADCXQ")
+ }
+ return p
+}
+
+// ADDB performs "Add".
+//
+// Mnemonic : ADD
+// Supported forms : (6 forms)
+//
+// * ADDB imm8, al
+// * ADDB imm8, r8
+// * ADDB r8, r8
+// * ADDB m8, r8
+// * ADDB imm8, m8
+// * ADDB r8, m8
+//
+func (self *Program) ADDB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDB", 2, Operands { v0, v1 })
+ // ADDB imm8, al
+ if isImm8(v0) && v1 == AL {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x04)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADDB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0x80)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADDB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x02)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x02)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ADDB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x80)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADDB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x00)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDB")
+ }
+ return p
+}
+
+// ADDL performs "Add".
+//
+// Mnemonic : ADD
+// Supported forms : (8 forms)
+//
+// * ADDL imm32, eax
+// * ADDL imm8, r32
+// * ADDL imm32, r32
+// * ADDL r32, r32
+// * ADDL m32, r32
+// * ADDL imm8, m32
+// * ADDL imm32, m32
+// * ADDL r32, m32
+//
+func (self *Program) ADDL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDL", 2, Operands { v0, v1 })
+ // ADDL imm32, eax
+ if isImm32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x05)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADDL imm8, r32
+ if isImm8Ext(v0, 4) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADDL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADDL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x03)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ADDL imm8, m32
+ if isImm8Ext(v0, 4) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADDL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADDL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x01)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDL")
+ }
+ return p
+}
+
+// ADDPD performs "Add Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : ADDPD
+// Supported forms : (2 forms)
+//
+// * ADDPD xmm, xmm [SSE2]
+// * ADDPD m128, xmm [SSE2]
+//
+func (self *Program) ADDPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDPD", 2, Operands { v0, v1 })
+ // ADDPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x58)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDPD")
+ }
+ return p
+}
+
+// ADDPS performs "Add Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : ADDPS
+// Supported forms : (2 forms)
+//
+// * ADDPS xmm, xmm [SSE]
+// * ADDPS m128, xmm [SSE]
+//
+func (self *Program) ADDPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDPS", 2, Operands { v0, v1 })
+ // ADDPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x58)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDPS")
+ }
+ return p
+}
+
+// ADDQ performs "Add".
+//
+// Mnemonic : ADD
+// Supported forms : (8 forms)
+//
+// * ADDQ imm32, rax
+// * ADDQ imm8, r64
+// * ADDQ imm32, r64
+// * ADDQ r64, r64
+// * ADDQ m64, r64
+// * ADDQ imm8, m64
+// * ADDQ imm32, m64
+// * ADDQ r64, m64
+//
+func (self *Program) ADDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDQ", 2, Operands { v0, v1 })
+ // ADDQ imm32, rax
+ if isImm32(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x05)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADDQ imm8, r64
+ if isImm8Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x83)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADDQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x81)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADDQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x03)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ADDQ imm8, m64
+ if isImm8Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x83)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADDQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x81)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ADDQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x01)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDQ")
+ }
+ return p
+}
+
+// ADDSD performs "Add Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : ADDSD
+// Supported forms : (2 forms)
+//
+// * ADDSD xmm, xmm [SSE2]
+// * ADDSD m64, xmm [SSE2]
+//
+func (self *Program) ADDSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDSD", 2, Operands { v0, v1 })
+ // ADDSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDSD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x58)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDSD")
+ }
+ return p
+}
+
+// ADDSS performs "Add Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : ADDSS
+// Supported forms : (2 forms)
+//
+// * ADDSS xmm, xmm [SSE]
+// * ADDSS m32, xmm [SSE]
+//
+func (self *Program) ADDSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDSS", 2, Operands { v0, v1 })
+ // ADDSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x58)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDSS")
+ }
+ return p
+}
+
+// ADDSUBPD performs "Packed Double-FP Add/Subtract".
+//
+// Mnemonic : ADDSUBPD
+// Supported forms : (2 forms)
+//
+// * ADDSUBPD xmm, xmm [SSE3]
+// * ADDSUBPD m128, xmm [SSE3]
+//
+func (self *Program) ADDSUBPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDSUBPD", 2, Operands { v0, v1 })
+ // ADDSUBPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd0)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDSUBPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDSUBPD")
+ }
+ return p
+}
+
+// ADDSUBPS performs "Packed Single-FP Add/Subtract".
+//
+// Mnemonic : ADDSUBPS
+// Supported forms : (2 forms)
+//
+// * ADDSUBPS xmm, xmm [SSE3]
+// * ADDSUBPS m128, xmm [SSE3]
+//
+func (self *Program) ADDSUBPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDSUBPS", 2, Operands { v0, v1 })
+ // ADDSUBPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd0)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDSUBPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDSUBPS")
+ }
+ return p
+}
+
+// ADDW performs "Add".
+//
+// Mnemonic : ADD
+// Supported forms : (8 forms)
+//
+// * ADDW imm16, ax
+// * ADDW imm8, r16
+// * ADDW imm16, r16
+// * ADDW r16, r16
+// * ADDW m16, r16
+// * ADDW imm8, m16
+// * ADDW imm16, m16
+// * ADDW r16, m16
+//
+func (self *Program) ADDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADDW", 2, Operands { v0, v1 })
+ // ADDW imm16, ax
+ if isImm16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x05)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ADDW imm8, r16
+ if isImm8Ext(v0, 2) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADDW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ADDW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADDW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x03)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ADDW imm8, m16
+ if isImm8Ext(v0, 2) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ADDW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ADDW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x01)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADDW")
+ }
+ return p
+}
+
+// ADOXL performs "Unsigned Integer Addition of Two Operands with Overflow Flag".
+//
+// Mnemonic : ADOX
+// Supported forms : (2 forms)
+//
+// * ADOXL r32, r32 [ADX]
+// * ADOXL m32, r32 [ADX]
+//
+func (self *Program) ADOXL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADOXL", 2, Operands { v0, v1 })
+ // ADOXL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_ADX)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADOXL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_ADX)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADOXL")
+ }
+ return p
+}
+
+// ADOXQ performs "Unsigned Integer Addition of Two Operands with Overflow Flag".
+//
+// Mnemonic : ADOX
+// Supported forms : (2 forms)
+//
+// * ADOXQ r64, r64 [ADX]
+// * ADOXQ m64, r64 [ADX]
+//
+func (self *Program) ADOXQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ADOXQ", 2, Operands { v0, v1 })
+ // ADOXQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_ADX)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ADOXQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_ADX)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ADOXQ")
+ }
+ return p
+}
+
+// AESDEC performs "Perform One Round of an AES Decryption Flow".
+//
+// Mnemonic : AESDEC
+// Supported forms : (2 forms)
+//
+// * AESDEC xmm, xmm [AES]
+// * AESDEC m128, xmm [AES]
+//
+func (self *Program) AESDEC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("AESDEC", 2, Operands { v0, v1 })
+ // AESDEC xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xde)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // AESDEC m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xde)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for AESDEC")
+ }
+ return p
+}
+
+// AESDECLAST performs "Perform Last Round of an AES Decryption Flow".
+//
+// Mnemonic : AESDECLAST
+// Supported forms : (2 forms)
+//
+// * AESDECLAST xmm, xmm [AES]
+// * AESDECLAST m128, xmm [AES]
+//
+func (self *Program) AESDECLAST(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("AESDECLAST", 2, Operands { v0, v1 })
+ // AESDECLAST xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // AESDECLAST m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xdf)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for AESDECLAST")
+ }
+ return p
+}
+
+// AESENC performs "Perform One Round of an AES Encryption Flow".
+//
+// Mnemonic : AESENC
+// Supported forms : (2 forms)
+//
+// * AESENC xmm, xmm [AES]
+// * AESENC m128, xmm [AES]
+//
+func (self *Program) AESENC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("AESENC", 2, Operands { v0, v1 })
+ // AESENC xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xdc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // AESENC m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xdc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for AESENC")
+ }
+ return p
+}
+
+// AESENCLAST performs "Perform Last Round of an AES Encryption Flow".
+//
+// Mnemonic : AESENCLAST
+// Supported forms : (2 forms)
+//
+// * AESENCLAST xmm, xmm [AES]
+// * AESENCLAST m128, xmm [AES]
+//
+func (self *Program) AESENCLAST(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("AESENCLAST", 2, Operands { v0, v1 })
+ // AESENCLAST xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xdd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // AESENCLAST m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xdd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for AESENCLAST")
+ }
+ return p
+}
+
+// AESIMC performs "Perform the AES InvMixColumn Transformation".
+//
+// Mnemonic : AESIMC
+// Supported forms : (2 forms)
+//
+// * AESIMC xmm, xmm [AES]
+// * AESIMC m128, xmm [AES]
+//
+func (self *Program) AESIMC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("AESIMC", 2, Operands { v0, v1 })
+ // AESIMC xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // AESIMC m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xdb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for AESIMC")
+ }
+ return p
+}
+
+// AESKEYGENASSIST performs "AES Round Key Generation Assist".
+//
+// Mnemonic : AESKEYGENASSIST
+// Supported forms : (2 forms)
+//
+// * AESKEYGENASSIST imm8, xmm, xmm [AES]
+// * AESKEYGENASSIST imm8, m128, xmm [AES]
+//
+func (self *Program) AESKEYGENASSIST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("AESKEYGENASSIST", 3, Operands { v0, v1, v2 })
+ // AESKEYGENASSIST imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // AESKEYGENASSIST imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for AESKEYGENASSIST")
+ }
+ return p
+}
+
+// ANDB performs "Logical AND".
+//
+// Mnemonic : AND
+// Supported forms : (6 forms)
+//
+// * ANDB imm8, al
+// * ANDB imm8, r8
+// * ANDB r8, r8
+// * ANDB m8, r8
+// * ANDB imm8, m8
+// * ANDB r8, m8
+//
+func (self *Program) ANDB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ANDB", 2, Operands { v0, v1 })
+ // ANDB imm8, al
+ if isImm8(v0) && v1 == AL {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x24)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ANDB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0x80)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ANDB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x22)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ANDB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x80)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ANDB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x20)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDB")
+ }
+ return p
+}
+
+// ANDL performs "Logical AND".
+//
+// Mnemonic : AND
+// Supported forms : (8 forms)
+//
+// * ANDL imm32, eax
+// * ANDL imm8, r32
+// * ANDL imm32, r32
+// * ANDL r32, r32
+// * ANDL m32, r32
+// * ANDL imm8, m32
+// * ANDL imm32, m32
+// * ANDL r32, m32
+//
+func (self *Program) ANDL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ANDL", 2, Operands { v0, v1 })
+ // ANDL imm32, eax
+ if isImm32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x25)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ANDL imm8, r32
+ if isImm8Ext(v0, 4) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ANDL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ANDL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x23)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ANDL imm8, m32
+ if isImm8Ext(v0, 4) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ANDL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ANDL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x21)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDL")
+ }
+ return p
+}
+
+// ANDNL performs "Logical AND NOT".
+//
+// Mnemonic : ANDN
+// Supported forms : (2 forms)
+//
+// * ANDNL r32, r32, r32 [BMI]
+// * ANDNL m32, r32, r32 [BMI]
+//
+func (self *Program) ANDNL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("ANDNL", 3, Operands { v0, v1, v2 })
+ // ANDNL r32, r32, r32
+ if isReg32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0xf2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDNL m32, r32, r32
+ if isM32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDNL")
+ }
+ return p
+}
+
+// ANDNPD performs "Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : ANDNPD
+// Supported forms : (2 forms)
+//
+// * ANDNPD xmm, xmm [SSE2]
+// * ANDNPD m128, xmm [SSE2]
+//
+func (self *Program) ANDNPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ANDNPD", 2, Operands { v0, v1 })
+ // ANDNPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDNPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x55)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDNPD")
+ }
+ return p
+}
+
+// ANDNPS performs "Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : ANDNPS
+// Supported forms : (2 forms)
+//
+// * ANDNPS xmm, xmm [SSE]
+// * ANDNPS m128, xmm [SSE]
+//
+func (self *Program) ANDNPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ANDNPS", 2, Operands { v0, v1 })
+ // ANDNPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDNPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x55)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDNPS")
+ }
+ return p
+}
+
+// ANDNQ performs "Logical AND NOT".
+//
+// Mnemonic : ANDN
+// Supported forms : (2 forms)
+//
+// * ANDNQ r64, r64, r64 [BMI]
+// * ANDNQ m64, r64, r64 [BMI]
+//
+func (self *Program) ANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("ANDNQ", 3, Operands { v0, v1, v2 })
+ // ANDNQ r64, r64, r64
+ if isReg64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0xf2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDNQ m64, r64, r64
+ if isM64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDNQ")
+ }
+ return p
+}
+
+// ANDPD performs "Bitwise Logical AND of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : ANDPD
+// Supported forms : (2 forms)
+//
+// * ANDPD xmm, xmm [SSE2]
+// * ANDPD m128, xmm [SSE2]
+//
+func (self *Program) ANDPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ANDPD", 2, Operands { v0, v1 })
+ // ANDPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x54)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDPD")
+ }
+ return p
+}
+
+// ANDPS performs "Bitwise Logical AND of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : ANDPS
+// Supported forms : (2 forms)
+//
+// * ANDPS xmm, xmm [SSE]
+// * ANDPS m128, xmm [SSE]
+//
+func (self *Program) ANDPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ANDPS", 2, Operands { v0, v1 })
+ // ANDPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x54)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDPS")
+ }
+ return p
+}
+
+// ANDQ performs "Logical AND".
+//
+// Mnemonic : AND
+// Supported forms : (8 forms)
+//
+// * ANDQ imm32, rax
+// * ANDQ imm8, r64
+// * ANDQ imm32, r64
+// * ANDQ r64, r64
+// * ANDQ m64, r64
+// * ANDQ imm8, m64
+// * ANDQ imm32, m64
+// * ANDQ r64, m64
+//
+func (self *Program) ANDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ANDQ", 2, Operands { v0, v1 })
+ // ANDQ imm32, rax
+ if isImm32(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x25)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ANDQ imm8, r64
+ if isImm8Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x83)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ANDQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x81)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ANDQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x23)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ANDQ imm8, m64
+ if isImm8Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x83)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ANDQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x81)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ANDQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x21)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDQ")
+ }
+ return p
+}
+
+// ANDW performs "Logical AND".
+//
+// Mnemonic : AND
+// Supported forms : (8 forms)
+//
+// * ANDW imm16, ax
+// * ANDW imm8, r16
+// * ANDW imm16, r16
+// * ANDW r16, r16
+// * ANDW m16, r16
+// * ANDW imm8, m16
+// * ANDW imm16, m16
+// * ANDW r16, m16
+//
+func (self *Program) ANDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ANDW", 2, Operands { v0, v1 })
+ // ANDW imm16, ax
+ if isImm16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x25)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ANDW imm8, r16
+ if isImm8Ext(v0, 2) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ANDW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ANDW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ANDW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x23)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ANDW imm8, m16
+ if isImm8Ext(v0, 2) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ANDW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ANDW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x21)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ANDW")
+ }
+ return p
+}
+
+// BEXTR performs "Bit Field Extract".
+//
+// Mnemonic : BEXTR
+// Supported forms : (8 forms)
+//
+// * BEXTR imm32, r32, r32 [TBM]
+// * BEXTR imm32, m32, r32 [TBM]
+// * BEXTR imm32, r64, r64 [TBM]
+// * BEXTR imm32, m64, r64 [TBM]
+// * BEXTR r32, r32, r32 [BMI]
+// * BEXTR r32, m32, r32 [BMI]
+// * BEXTR r64, r64, r64 [BMI]
+// * BEXTR r64, m64, r64 [BMI]
+//
+func (self *Program) BEXTR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("BEXTR", 3, Operands { v0, v1, v2 })
+ // BEXTR imm32, r32, r32
+ if isImm32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xea ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // BEXTR imm32, m32, r32
+ if isImm32(v0) && isM32(v1) && isReg32(v2) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1010, 0x00, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // BEXTR imm32, r64, r64
+ if isImm32(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xea ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xf8)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // BEXTR imm32, m64, r64
+ if isImm32(v0) && isM64(v1) && isReg64(v2) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1010, 0x80, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // BEXTR r32, r32, r32
+ if isReg32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // BEXTR r32, m32, r32
+ if isReg32(v0) && isM32(v1) && isReg32(v2) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf7)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // BEXTR r64, r64, r64
+ if isReg64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[0]) << 3))
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // BEXTR r64, m64, r64
+ if isReg64(v0) && isM64(v1) && isReg64(v2) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf7)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BEXTR")
+ }
+ return p
+}
+
+// BLCFILL performs "Fill From Lowest Clear Bit".
+//
+// Mnemonic : BLCFILL
+// Supported forms : (4 forms)
+//
+// * BLCFILL r32, r32 [TBM]
+// * BLCFILL m32, r32 [TBM]
+// * BLCFILL r64, r64 [TBM]
+// * BLCFILL m64, r64 [TBM]
+//
+func (self *Program) BLCFILL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLCFILL", 2, Operands { v0, v1 })
+ // BLCFILL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // BLCFILL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ // BLCFILL r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // BLCFILL m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLCFILL")
+ }
+ return p
+}
+
+// BLCI performs "Isolate Lowest Clear Bit".
+//
+// Mnemonic : BLCI
+// Supported forms : (4 forms)
+//
+// * BLCI r32, r32 [TBM]
+// * BLCI m32, r32 [TBM]
+// * BLCI r64, r64 [TBM]
+// * BLCI m64, r64 [TBM]
+//
+func (self *Program) BLCI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLCI", 2, Operands { v0, v1 })
+ // BLCI r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0x02)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // BLCI m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x02)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ // BLCI r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x02)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // BLCI m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x02)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLCI")
+ }
+ return p
+}
+
+// BLCIC performs "Isolate Lowest Set Bit and Complement".
+//
+// Mnemonic : BLCIC
+// Supported forms : (4 forms)
+//
+// * BLCIC r32, r32 [TBM]
+// * BLCIC m32, r32 [TBM]
+// * BLCIC r64, r64 [TBM]
+// * BLCIC m64, r64 [TBM]
+//
+func (self *Program) BLCIC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLCIC", 2, Operands { v0, v1 })
+ // BLCIC r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xe8 | lcode(v[0]))
+ })
+ }
+ // BLCIC m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(5, addr(v[0]), 1)
+ })
+ }
+ // BLCIC r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xe8 | lcode(v[0]))
+ })
+ }
+ // BLCIC m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(5, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLCIC")
+ }
+ return p
+}
+
+// BLCMSK performs "Mask From Lowest Clear Bit".
+//
+// Mnemonic : BLCMSK
+// Supported forms : (4 forms)
+//
+// * BLCMSK r32, r32 [TBM]
+// * BLCMSK m32, r32 [TBM]
+// * BLCMSK r64, r64 [TBM]
+// * BLCMSK m64, r64 [TBM]
+//
+func (self *Program) BLCMSK(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLCMSK", 2, Operands { v0, v1 })
+ // BLCMSK r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0x02)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // BLCMSK m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x02)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ // BLCMSK r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x02)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // BLCMSK m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x02)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLCMSK")
+ }
+ return p
+}
+
+// BLCS performs "Set Lowest Clear Bit".
+//
+// Mnemonic : BLCS
+// Supported forms : (4 forms)
+//
+// * BLCS r32, r32 [TBM]
+// * BLCS m32, r32 [TBM]
+// * BLCS r64, r64 [TBM]
+// * BLCS m64, r64 [TBM]
+//
+func (self *Program) BLCS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLCS", 2, Operands { v0, v1 })
+ // BLCS r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xd8 | lcode(v[0]))
+ })
+ }
+ // BLCS m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ // BLCS r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xd8 | lcode(v[0]))
+ })
+ }
+ // BLCS m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLCS")
+ }
+ return p
+}
+
+// BLENDPD performs "Blend Packed Double Precision Floating-Point Values".
+//
+// Mnemonic : BLENDPD
+// Supported forms : (2 forms)
+//
+// * BLENDPD imm8, xmm, xmm [SSE4.1]
+// * BLENDPD imm8, m128, xmm [SSE4.1]
+//
+func (self *Program) BLENDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("BLENDPD", 3, Operands { v0, v1, v2 })
+ // BLENDPD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BLENDPD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0d)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLENDPD")
+ }
+ return p
+}
+
+// BLENDPS performs " Blend Packed Single Precision Floating-Point Values".
+//
+// Mnemonic : BLENDPS
+// Supported forms : (2 forms)
+//
+// * BLENDPS imm8, xmm, xmm [SSE4.1]
+// * BLENDPS imm8, m128, xmm [SSE4.1]
+//
+func (self *Program) BLENDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("BLENDPS", 3, Operands { v0, v1, v2 })
+ // BLENDPS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BLENDPS imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0c)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLENDPS")
+ }
+ return p
+}
+
+// BLENDVPD performs " Variable Blend Packed Double Precision Floating-Point Values".
+//
+// Mnemonic : BLENDVPD
+// Supported forms : (2 forms)
+//
+// * BLENDVPD xmm0, xmm, xmm [SSE4.1]
+// * BLENDVPD xmm0, m128, xmm [SSE4.1]
+//
+func (self *Program) BLENDVPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("BLENDVPD", 3, Operands { v0, v1, v2 })
+ // BLENDVPD xmm0, xmm, xmm
+ if v0 == XMM0 && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // BLENDVPD xmm0, m128, xmm
+ if v0 == XMM0 && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLENDVPD")
+ }
+ return p
+}
+
+// BLENDVPS performs " Variable Blend Packed Single Precision Floating-Point Values".
+//
+// Mnemonic : BLENDVPS
+// Supported forms : (2 forms)
+//
+// * BLENDVPS xmm0, xmm, xmm [SSE4.1]
+// * BLENDVPS xmm0, m128, xmm [SSE4.1]
+//
+func (self *Program) BLENDVPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("BLENDVPS", 3, Operands { v0, v1, v2 })
+ // BLENDVPS xmm0, xmm, xmm
+ if v0 == XMM0 && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // BLENDVPS xmm0, m128, xmm
+ if v0 == XMM0 && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLENDVPS")
+ }
+ return p
+}
+
+// BLSFILL performs "Fill From Lowest Set Bit".
+//
+// Mnemonic : BLSFILL
+// Supported forms : (4 forms)
+//
+// * BLSFILL r32, r32 [TBM]
+// * BLSFILL m32, r32 [TBM]
+// * BLSFILL r64, r64 [TBM]
+// * BLSFILL m64, r64 [TBM]
+//
+func (self *Program) BLSFILL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLSFILL", 2, Operands { v0, v1 })
+ // BLSFILL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xd0 | lcode(v[0]))
+ })
+ }
+ // BLSFILL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ // BLSFILL r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xd0 | lcode(v[0]))
+ })
+ }
+ // BLSFILL m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLSFILL")
+ }
+ return p
+}
+
+// BLSI performs "Isolate Lowest Set Bit".
+//
+// Mnemonic : BLSI
+// Supported forms : (4 forms)
+//
+// * BLSI r32, r32 [BMI]
+// * BLSI m32, r32 [BMI]
+// * BLSI r64, r64 [BMI]
+// * BLSI m64, r64 [BMI]
+//
+func (self *Program) BLSI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLSI", 2, Operands { v0, v1 })
+ // BLSI r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0xf3)
+ m.emit(0xd8 | lcode(v[0]))
+ })
+ }
+ // BLSI m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0xf3)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ // BLSI r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0xf3)
+ m.emit(0xd8 | lcode(v[0]))
+ })
+ }
+ // BLSI m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0xf3)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLSI")
+ }
+ return p
+}
+
+// BLSIC performs "Isolate Lowest Set Bit and Complement".
+//
+// Mnemonic : BLSIC
+// Supported forms : (4 forms)
+//
+// * BLSIC r32, r32 [TBM]
+// * BLSIC m32, r32 [TBM]
+// * BLSIC r64, r64 [TBM]
+// * BLSIC m64, r64 [TBM]
+//
+func (self *Program) BLSIC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLSIC", 2, Operands { v0, v1 })
+ // BLSIC r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // BLSIC m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ // BLSIC r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // BLSIC m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLSIC")
+ }
+ return p
+}
+
+// BLSMSK performs "Mask From Lowest Set Bit".
+//
+// Mnemonic : BLSMSK
+// Supported forms : (4 forms)
+//
+// * BLSMSK r32, r32 [BMI]
+// * BLSMSK m32, r32 [BMI]
+// * BLSMSK r64, r64 [BMI]
+// * BLSMSK m64, r64 [BMI]
+//
+func (self *Program) BLSMSK(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLSMSK", 2, Operands { v0, v1 })
+ // BLSMSK r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0xf3)
+ m.emit(0xd0 | lcode(v[0]))
+ })
+ }
+ // BLSMSK m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0xf3)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ // BLSMSK r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0xf3)
+ m.emit(0xd0 | lcode(v[0]))
+ })
+ }
+ // BLSMSK m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0xf3)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLSMSK")
+ }
+ return p
+}
+
+// BLSR performs "Reset Lowest Set Bit".
+//
+// Mnemonic : BLSR
+// Supported forms : (4 forms)
+//
+// * BLSR r32, r32 [BMI]
+// * BLSR m32, r32 [BMI]
+// * BLSR r64, r64 [BMI]
+// * BLSR m64, r64 [BMI]
+//
+func (self *Program) BLSR(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BLSR", 2, Operands { v0, v1 })
+ // BLSR r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0xf3)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // BLSR m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0xf3)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ // BLSR r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0xf3)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // BLSR m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0xf3)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BLSR")
+ }
+ return p
+}
+
+// BSFL performs "Bit Scan Forward".
+//
+// Mnemonic : BSF
+// Supported forms : (2 forms)
+//
+// * BSFL r32, r32
+// * BSFL m32, r32
+//
+func (self *Program) BSFL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BSFL", 2, Operands { v0, v1 })
+ // BSFL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // BSFL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BSFL")
+ }
+ return p
+}
+
+// BSFQ performs "Bit Scan Forward".
+//
+// Mnemonic : BSF
+// Supported forms : (2 forms)
+//
+// * BSFQ r64, r64
+// * BSFQ m64, r64
+//
+func (self *Program) BSFQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BSFQ", 2, Operands { v0, v1 })
+ // BSFQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // BSFQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BSFQ")
+ }
+ return p
+}
+
+// BSFW performs "Bit Scan Forward".
+//
+// Mnemonic : BSF
+// Supported forms : (2 forms)
+//
+// * BSFW r16, r16
+// * BSFW m16, r16
+//
+func (self *Program) BSFW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BSFW", 2, Operands { v0, v1 })
+ // BSFW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // BSFW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BSFW")
+ }
+ return p
+}
+
+// BSRL performs "Bit Scan Reverse".
+//
+// Mnemonic : BSR
+// Supported forms : (2 forms)
+//
+// * BSRL r32, r32
+// * BSRL m32, r32
+//
+func (self *Program) BSRL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BSRL", 2, Operands { v0, v1 })
+ // BSRL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // BSRL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BSRL")
+ }
+ return p
+}
+
+// BSRQ performs "Bit Scan Reverse".
+//
+// Mnemonic : BSR
+// Supported forms : (2 forms)
+//
+// * BSRQ r64, r64
+// * BSRQ m64, r64
+//
+func (self *Program) BSRQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BSRQ", 2, Operands { v0, v1 })
+ // BSRQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // BSRQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BSRQ")
+ }
+ return p
+}
+
+// BSRW performs "Bit Scan Reverse".
+//
+// Mnemonic : BSR
+// Supported forms : (2 forms)
+//
+// * BSRW r16, r16
+// * BSRW m16, r16
+//
+func (self *Program) BSRW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BSRW", 2, Operands { v0, v1 })
+ // BSRW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // BSRW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BSRW")
+ }
+ return p
+}
+
+// BSWAPL performs "Byte Swap".
+//
+// Mnemonic : BSWAP
+// Supported forms : (1 form)
+//
+// * BSWAPL r32
+//
+func (self *Program) BSWAPL(v0 interface{}) *Instruction {
+ p := self.alloc("BSWAPL", 1, Operands { v0 })
+ // BSWAPL r32
+ if isReg32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0x0f)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BSWAPL")
+ }
+ return p
+}
+
+// BSWAPQ performs "Byte Swap".
+//
+// Mnemonic : BSWAP
+// Supported forms : (1 form)
+//
+// * BSWAPQ r64
+//
+func (self *Program) BSWAPQ(v0 interface{}) *Instruction {
+ p := self.alloc("BSWAPQ", 1, Operands { v0 })
+ // BSWAPQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BSWAPQ")
+ }
+ return p
+}
+
+// BTCL performs "Bit Test and Complement".
+//
+// Mnemonic : BTC
+// Supported forms : (4 forms)
+//
+// * BTCL imm8, r32
+// * BTCL r32, r32
+// * BTCL imm8, m32
+// * BTCL r32, m32
+//
+func (self *Program) BTCL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTCL", 2, Operands { v0, v1 })
+ // BTCL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTCL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xbb)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTCL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTCL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xbb)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTCL")
+ }
+ return p
+}
+
+// BTCQ performs "Bit Test and Complement".
+//
+// Mnemonic : BTC
+// Supported forms : (4 forms)
+//
+// * BTCQ imm8, r64
+// * BTCQ r64, r64
+// * BTCQ imm8, m64
+// * BTCQ r64, m64
+//
+func (self *Program) BTCQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTCQ", 2, Operands { v0, v1 })
+ // BTCQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTCQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xbb)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTCQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTCQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xbb)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTCQ")
+ }
+ return p
+}
+
+// BTCW performs "Bit Test and Complement".
+//
+// Mnemonic : BTC
+// Supported forms : (4 forms)
+//
+// * BTCW imm8, r16
+// * BTCW r16, r16
+// * BTCW imm8, m16
+// * BTCW r16, m16
+//
+func (self *Program) BTCW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTCW", 2, Operands { v0, v1 })
+ // BTCW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTCW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xbb)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTCW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTCW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xbb)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTCW")
+ }
+ return p
+}
+
+// BTL performs "Bit Test".
+//
+// Mnemonic : BT
+// Supported forms : (4 forms)
+//
+// * BTL imm8, r32
+// * BTL r32, r32
+// * BTL imm8, m32
+// * BTL r32, m32
+//
+func (self *Program) BTL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTL", 2, Operands { v0, v1 })
+ // BTL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xa3)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xa3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTL")
+ }
+ return p
+}
+
+// BTQ performs "Bit Test".
+//
+// Mnemonic : BT
+// Supported forms : (4 forms)
+//
+// * BTQ imm8, r64
+// * BTQ r64, r64
+// * BTQ imm8, m64
+// * BTQ r64, m64
+//
+func (self *Program) BTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTQ", 2, Operands { v0, v1 })
+ // BTQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xa3)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xa3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTQ")
+ }
+ return p
+}
+
+// BTRL performs "Bit Test and Reset".
+//
+// Mnemonic : BTR
+// Supported forms : (4 forms)
+//
+// * BTRL imm8, r32
+// * BTRL r32, r32
+// * BTRL imm8, m32
+// * BTRL r32, m32
+//
+func (self *Program) BTRL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTRL", 2, Operands { v0, v1 })
+ // BTRL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTRL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xb3)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTRL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTRL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xb3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTRL")
+ }
+ return p
+}
+
+// BTRQ performs "Bit Test and Reset".
+//
+// Mnemonic : BTR
+// Supported forms : (4 forms)
+//
+// * BTRQ imm8, r64
+// * BTRQ r64, r64
+// * BTRQ imm8, m64
+// * BTRQ r64, m64
+//
+func (self *Program) BTRQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTRQ", 2, Operands { v0, v1 })
+ // BTRQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTRQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xb3)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTRQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTRQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xb3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTRQ")
+ }
+ return p
+}
+
+// BTRW performs "Bit Test and Reset".
+//
+// Mnemonic : BTR
+// Supported forms : (4 forms)
+//
+// * BTRW imm8, r16
+// * BTRW r16, r16
+// * BTRW imm8, m16
+// * BTRW r16, m16
+//
+func (self *Program) BTRW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTRW", 2, Operands { v0, v1 })
+ // BTRW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTRW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xb3)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTRW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTRW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xb3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTRW")
+ }
+ return p
+}
+
+// BTSL performs "Bit Test and Set".
+//
+// Mnemonic : BTS
+// Supported forms : (4 forms)
+//
+// * BTSL imm8, r32
+// * BTSL r32, r32
+// * BTSL imm8, m32
+// * BTSL r32, m32
+//
+func (self *Program) BTSL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTSL", 2, Operands { v0, v1 })
+ // BTSL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTSL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xab)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTSL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTSL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xab)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTSL")
+ }
+ return p
+}
+
+// BTSQ performs "Bit Test and Set".
+//
+// Mnemonic : BTS
+// Supported forms : (4 forms)
+//
+// * BTSQ imm8, r64
+// * BTSQ r64, r64
+// * BTSQ imm8, m64
+// * BTSQ r64, m64
+//
+func (self *Program) BTSQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTSQ", 2, Operands { v0, v1 })
+ // BTSQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTSQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xab)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTSQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTSQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xab)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTSQ")
+ }
+ return p
+}
+
+// BTSW performs "Bit Test and Set".
+//
+// Mnemonic : BTS
+// Supported forms : (4 forms)
+//
+// * BTSW imm8, r16
+// * BTSW r16, r16
+// * BTSW imm8, m16
+// * BTSW r16, m16
+//
+func (self *Program) BTSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTSW", 2, Operands { v0, v1 })
+ // BTSW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTSW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xab)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTSW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTSW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xab)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTSW")
+ }
+ return p
+}
+
+// BTW performs "Bit Test".
+//
+// Mnemonic : BT
+// Supported forms : (4 forms)
+//
+// * BTW imm8, r16
+// * BTW r16, r16
+// * BTW imm8, m16
+// * BTW r16, m16
+//
+func (self *Program) BTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("BTW", 2, Operands { v0, v1 })
+ // BTW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xa3)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // BTW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xba)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // BTW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xa3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BTW")
+ }
+ return p
+}
+
+// BZHI performs "Zero High Bits Starting with Specified Bit Position".
+//
+// Mnemonic : BZHI
+// Supported forms : (4 forms)
+//
+// * BZHI r32, r32, r32 [BMI2]
+// * BZHI r32, m32, r32 [BMI2]
+// * BZHI r64, r64, r64 [BMI2]
+// * BZHI r64, m64, r64 [BMI2]
+//
+func (self *Program) BZHI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("BZHI", 3, Operands { v0, v1, v2 })
+ // BZHI r32, r32, r32
+ if isReg32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // BZHI r32, m32, r32
+ if isReg32(v0) && isM32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // BZHI r64, r64, r64
+ if isReg64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[0]) << 3))
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // BZHI r64, m64, r64
+ if isReg64(v0) && isM64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for BZHI")
+ }
+ return p
+}
+
+// CALL performs "Call Procedure".
+//
+// Mnemonic : CALL
+// Supported forms : (1 form)
+//
+// * CALL rel32
+//
+func (self *Program) CALL(v0 interface{}) *Instruction {
+ p := self.alloc("CALL", 1, Operands { v0 })
+ // CALL rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xe8)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // CALL label
+ if isLabel(v0) {
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0xe8)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CALL")
+ }
+ return p
+}
+
+// CALLQ performs "Call Procedure".
+//
+// Mnemonic : CALL
+// Supported forms : (2 forms)
+//
+// * CALLQ r64
+// * CALLQ m64
+//
+func (self *Program) CALLQ(v0 interface{}) *Instruction {
+ p := self.alloc("CALLQ", 1, Operands { v0 })
+ // CALLQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xff)
+ m.emit(0xd0 | lcode(v[0]))
+ })
+ }
+ // CALLQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xff)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CALLQ")
+ }
+ return p
+}
+
+// CBTW performs "Convert Byte to Word".
+//
+// Mnemonic : CBW
+// Supported forms : (1 form)
+//
+// * CBTW
+//
+func (self *Program) CBTW() *Instruction {
+ p := self.alloc("CBTW", 0, Operands { })
+ // CBTW
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x98)
+ })
+ return p
+}
+
+// CLC performs "Clear Carry Flag".
+//
+// Mnemonic : CLC
+// Supported forms : (1 form)
+//
+// * CLC
+//
+func (self *Program) CLC() *Instruction {
+ p := self.alloc("CLC", 0, Operands { })
+ // CLC
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf8)
+ })
+ return p
+}
+
+// CLD performs "Clear Direction Flag".
+//
+// Mnemonic : CLD
+// Supported forms : (1 form)
+//
+// * CLD
+//
+func (self *Program) CLD() *Instruction {
+ p := self.alloc("CLD", 0, Operands { })
+ // CLD
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xfc)
+ })
+ return p
+}
+
+// CLFLUSH performs "Flush Cache Line".
+//
+// Mnemonic : CLFLUSH
+// Supported forms : (1 form)
+//
+// * CLFLUSH m8 [CLFLUSH]
+//
+func (self *Program) CLFLUSH(v0 interface{}) *Instruction {
+ p := self.alloc("CLFLUSH", 1, Operands { v0 })
+ // CLFLUSH m8
+ if isM8(v0) {
+ self.require(ISA_CLFLUSH)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xae)
+ m.mrsd(7, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CLFLUSH")
+ }
+ return p
+}
+
+// CLFLUSHOPT performs "Flush Cache Line Optimized".
+//
+// Mnemonic : CLFLUSHOPT
+// Supported forms : (1 form)
+//
+// * CLFLUSHOPT m8 [CLFLUSHOPT]
+//
+func (self *Program) CLFLUSHOPT(v0 interface{}) *Instruction {
+ p := self.alloc("CLFLUSHOPT", 1, Operands { v0 })
+ // CLFLUSHOPT m8
+ if isM8(v0) {
+ self.require(ISA_CLFLUSHOPT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xae)
+ m.mrsd(7, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CLFLUSHOPT")
+ }
+ return p
+}
+
+// CLTD performs "Convert Doubleword to Quadword".
+//
+// Mnemonic : CDQ
+// Supported forms : (1 form)
+//
+// * CLTD
+//
+func (self *Program) CLTD() *Instruction {
+ p := self.alloc("CLTD", 0, Operands { })
+ // CLTD
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x99)
+ })
+ return p
+}
+
+// CLTQ performs "Convert Doubleword to Quadword".
+//
+// Mnemonic : CDQE
+// Supported forms : (1 form)
+//
+// * CLTQ
+//
+func (self *Program) CLTQ() *Instruction {
+ p := self.alloc("CLTQ", 0, Operands { })
+ // CLTQ
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x98)
+ })
+ return p
+}
+
+// CLWB performs "Cache Line Write Back".
+//
+// Mnemonic : CLWB
+// Supported forms : (1 form)
+//
+// * CLWB m8 [CLWB]
+//
+func (self *Program) CLWB(v0 interface{}) *Instruction {
+ p := self.alloc("CLWB", 1, Operands { v0 })
+ // CLWB m8
+ if isM8(v0) {
+ self.require(ISA_CLWB)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xae)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CLWB")
+ }
+ return p
+}
+
+// CLZERO performs "Zero-out 64-bit Cache Line".
+//
+// Mnemonic : CLZERO
+// Supported forms : (1 form)
+//
+// * CLZERO [CLZERO]
+//
+func (self *Program) CLZERO() *Instruction {
+ p := self.alloc("CLZERO", 0, Operands { })
+ // CLZERO
+ self.require(ISA_CLZERO)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x01)
+ m.emit(0xfc)
+ })
+ return p
+}
+
+// CMC performs "Complement Carry Flag".
+//
+// Mnemonic : CMC
+// Supported forms : (1 form)
+//
+// * CMC
+//
+func (self *Program) CMC() *Instruction {
+ p := self.alloc("CMC", 0, Operands { })
+ // CMC
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf5)
+ })
+ return p
+}
+
+// CMOVA performs "Move if above (CF == 0 and ZF == 0)".
+//
+// Mnemonic : CMOVA
+// Supported forms : (6 forms)
+//
+// * CMOVA r16, r16 [CMOV]
+// * CMOVA m16, r16 [CMOV]
+// * CMOVA r32, r32 [CMOV]
+// * CMOVA m32, r32 [CMOV]
+// * CMOVA r64, r64 [CMOV]
+// * CMOVA m64, r64 [CMOV]
+//
+func (self *Program) CMOVA(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVA", 2, Operands { v0, v1 })
+ // CMOVA r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVA m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVA r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVA m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVA r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVA m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVA")
+ }
+ return p
+}
+
+// CMOVAE performs "Move if above or equal (CF == 0)".
+//
+// Mnemonic : CMOVAE
+// Supported forms : (6 forms)
+//
+// * CMOVAE r16, r16 [CMOV]
+// * CMOVAE m16, r16 [CMOV]
+// * CMOVAE r32, r32 [CMOV]
+// * CMOVAE m32, r32 [CMOV]
+// * CMOVAE r64, r64 [CMOV]
+// * CMOVAE m64, r64 [CMOV]
+//
+func (self *Program) CMOVAE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVAE", 2, Operands { v0, v1 })
+ // CMOVAE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVAE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVAE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVAE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVAE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVAE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVAE")
+ }
+ return p
+}
+
+// CMOVB performs "Move if below (CF == 1)".
+//
+// Mnemonic : CMOVB
+// Supported forms : (6 forms)
+//
+// * CMOVB r16, r16 [CMOV]
+// * CMOVB m16, r16 [CMOV]
+// * CMOVB r32, r32 [CMOV]
+// * CMOVB m32, r32 [CMOV]
+// * CMOVB r64, r64 [CMOV]
+// * CMOVB m64, r64 [CMOV]
+//
+func (self *Program) CMOVB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVB", 2, Operands { v0, v1 })
+ // CMOVB r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVB m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVB r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVB m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVB r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVB m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVB")
+ }
+ return p
+}
+
+// CMOVBE performs "Move if below or equal (CF == 1 or ZF == 1)".
+//
+// Mnemonic : CMOVBE
+// Supported forms : (6 forms)
+//
+// * CMOVBE r16, r16 [CMOV]
+// * CMOVBE m16, r16 [CMOV]
+// * CMOVBE r32, r32 [CMOV]
+// * CMOVBE m32, r32 [CMOV]
+// * CMOVBE r64, r64 [CMOV]
+// * CMOVBE m64, r64 [CMOV]
+//
+func (self *Program) CMOVBE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVBE", 2, Operands { v0, v1 })
+ // CMOVBE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVBE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVBE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVBE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVBE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVBE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVBE")
+ }
+ return p
+}
+
+// CMOVC performs "Move if carry (CF == 1)".
+//
+// Mnemonic : CMOVC
+// Supported forms : (6 forms)
+//
+// * CMOVC r16, r16 [CMOV]
+// * CMOVC m16, r16 [CMOV]
+// * CMOVC r32, r32 [CMOV]
+// * CMOVC m32, r32 [CMOV]
+// * CMOVC r64, r64 [CMOV]
+// * CMOVC m64, r64 [CMOV]
+//
+func (self *Program) CMOVC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVC", 2, Operands { v0, v1 })
+ // CMOVC r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVC m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVC r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVC m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVC r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVC m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVC")
+ }
+ return p
+}
+
+// CMOVE performs "Move if equal (ZF == 1)".
+//
+// Mnemonic : CMOVE
+// Supported forms : (6 forms)
+//
+// * CMOVE r16, r16 [CMOV]
+// * CMOVE m16, r16 [CMOV]
+// * CMOVE r32, r32 [CMOV]
+// * CMOVE m32, r32 [CMOV]
+// * CMOVE r64, r64 [CMOV]
+// * CMOVE m64, r64 [CMOV]
+//
+func (self *Program) CMOVE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVE", 2, Operands { v0, v1 })
+ // CMOVE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVE")
+ }
+ return p
+}
+
+// CMOVG performs "Move if greater (ZF == 0 and SF == OF)".
+//
+// Mnemonic : CMOVG
+// Supported forms : (6 forms)
+//
+// * CMOVG r16, r16 [CMOV]
+// * CMOVG m16, r16 [CMOV]
+// * CMOVG r32, r32 [CMOV]
+// * CMOVG m32, r32 [CMOV]
+// * CMOVG r64, r64 [CMOV]
+// * CMOVG m64, r64 [CMOV]
+//
+func (self *Program) CMOVG(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVG", 2, Operands { v0, v1 })
+ // CMOVG r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVG m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVG r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVG m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVG r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVG m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVG")
+ }
+ return p
+}
+
+// CMOVGE performs "Move if greater or equal (SF == OF)".
+//
+// Mnemonic : CMOVGE
+// Supported forms : (6 forms)
+//
+// * CMOVGE r16, r16 [CMOV]
+// * CMOVGE m16, r16 [CMOV]
+// * CMOVGE r32, r32 [CMOV]
+// * CMOVGE m32, r32 [CMOV]
+// * CMOVGE r64, r64 [CMOV]
+// * CMOVGE m64, r64 [CMOV]
+//
+func (self *Program) CMOVGE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVGE", 2, Operands { v0, v1 })
+ // CMOVGE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVGE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVGE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVGE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVGE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVGE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVGE")
+ }
+ return p
+}
+
+// CMOVL performs "Move if less (SF != OF)".
+//
+// Mnemonic : CMOVL
+// Supported forms : (6 forms)
+//
+// * CMOVL r16, r16 [CMOV]
+// * CMOVL m16, r16 [CMOV]
+// * CMOVL r32, r32 [CMOV]
+// * CMOVL m32, r32 [CMOV]
+// * CMOVL r64, r64 [CMOV]
+// * CMOVL m64, r64 [CMOV]
+//
+func (self *Program) CMOVL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVL", 2, Operands { v0, v1 })
+ // CMOVL r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVL m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVL r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVL m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVL")
+ }
+ return p
+}
+
+// CMOVLE performs "Move if less or equal (ZF == 1 or SF != OF)".
+//
+// Mnemonic : CMOVLE
+// Supported forms : (6 forms)
+//
+// * CMOVLE r16, r16 [CMOV]
+// * CMOVLE m16, r16 [CMOV]
+// * CMOVLE r32, r32 [CMOV]
+// * CMOVLE m32, r32 [CMOV]
+// * CMOVLE r64, r64 [CMOV]
+// * CMOVLE m64, r64 [CMOV]
+//
+func (self *Program) CMOVLE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVLE", 2, Operands { v0, v1 })
+ // CMOVLE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVLE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVLE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVLE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVLE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVLE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVLE")
+ }
+ return p
+}
+
+// CMOVNA performs "Move if not above (CF == 1 or ZF == 1)".
+//
+// Mnemonic : CMOVNA
+// Supported forms : (6 forms)
+//
+// * CMOVNA r16, r16 [CMOV]
+// * CMOVNA m16, r16 [CMOV]
+// * CMOVNA r32, r32 [CMOV]
+// * CMOVNA m32, r32 [CMOV]
+// * CMOVNA r64, r64 [CMOV]
+// * CMOVNA m64, r64 [CMOV]
+//
+func (self *Program) CMOVNA(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNA", 2, Operands { v0, v1 })
+ // CMOVNA r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNA m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNA r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNA m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNA r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNA m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x46)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNA")
+ }
+ return p
+}
+
+// CMOVNAE performs "Move if not above or equal (CF == 1)".
+//
+// Mnemonic : CMOVNAE
+// Supported forms : (6 forms)
+//
+// * CMOVNAE r16, r16 [CMOV]
+// * CMOVNAE m16, r16 [CMOV]
+// * CMOVNAE r32, r32 [CMOV]
+// * CMOVNAE m32, r32 [CMOV]
+// * CMOVNAE r64, r64 [CMOV]
+// * CMOVNAE m64, r64 [CMOV]
+//
+func (self *Program) CMOVNAE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNAE", 2, Operands { v0, v1 })
+ // CMOVNAE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNAE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNAE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNAE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNAE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNAE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNAE")
+ }
+ return p
+}
+
+// CMOVNB performs "Move if not below (CF == 0)".
+//
+// Mnemonic : CMOVNB
+// Supported forms : (6 forms)
+//
+// * CMOVNB r16, r16 [CMOV]
+// * CMOVNB m16, r16 [CMOV]
+// * CMOVNB r32, r32 [CMOV]
+// * CMOVNB m32, r32 [CMOV]
+// * CMOVNB r64, r64 [CMOV]
+// * CMOVNB m64, r64 [CMOV]
+//
+func (self *Program) CMOVNB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNB", 2, Operands { v0, v1 })
+ // CMOVNB r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNB m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNB r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNB m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNB r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNB m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNB")
+ }
+ return p
+}
+
+// CMOVNBE performs "Move if not below or equal (CF == 0 and ZF == 0)".
+//
+// Mnemonic : CMOVNBE
+// Supported forms : (6 forms)
+//
+// * CMOVNBE r16, r16 [CMOV]
+// * CMOVNBE m16, r16 [CMOV]
+// * CMOVNBE r32, r32 [CMOV]
+// * CMOVNBE m32, r32 [CMOV]
+// * CMOVNBE r64, r64 [CMOV]
+// * CMOVNBE m64, r64 [CMOV]
+//
+func (self *Program) CMOVNBE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNBE", 2, Operands { v0, v1 })
+ // CMOVNBE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNBE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNBE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNBE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNBE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNBE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x47)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNBE")
+ }
+ return p
+}
+
+// CMOVNC performs "Move if not carry (CF == 0)".
+//
+// Mnemonic : CMOVNC
+// Supported forms : (6 forms)
+//
+// * CMOVNC r16, r16 [CMOV]
+// * CMOVNC m16, r16 [CMOV]
+// * CMOVNC r32, r32 [CMOV]
+// * CMOVNC m32, r32 [CMOV]
+// * CMOVNC r64, r64 [CMOV]
+// * CMOVNC m64, r64 [CMOV]
+//
+func (self *Program) CMOVNC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNC", 2, Operands { v0, v1 })
+ // CMOVNC r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNC m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNC r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNC m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNC r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNC m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x43)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNC")
+ }
+ return p
+}
+
+// CMOVNE performs "Move if not equal (ZF == 0)".
+//
+// Mnemonic : CMOVNE
+// Supported forms : (6 forms)
+//
+// * CMOVNE r16, r16 [CMOV]
+// * CMOVNE m16, r16 [CMOV]
+// * CMOVNE r32, r32 [CMOV]
+// * CMOVNE m32, r32 [CMOV]
+// * CMOVNE r64, r64 [CMOV]
+// * CMOVNE m64, r64 [CMOV]
+//
+func (self *Program) CMOVNE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNE", 2, Operands { v0, v1 })
+ // CMOVNE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNE")
+ }
+ return p
+}
+
+// CMOVNG performs "Move if not greater (ZF == 1 or SF != OF)".
+//
+// Mnemonic : CMOVNG
+// Supported forms : (6 forms)
+//
+// * CMOVNG r16, r16 [CMOV]
+// * CMOVNG m16, r16 [CMOV]
+// * CMOVNG r32, r32 [CMOV]
+// * CMOVNG m32, r32 [CMOV]
+// * CMOVNG r64, r64 [CMOV]
+// * CMOVNG m64, r64 [CMOV]
+//
+func (self *Program) CMOVNG(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNG", 2, Operands { v0, v1 })
+ // CMOVNG r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNG m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNG r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNG m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNG r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNG m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNG")
+ }
+ return p
+}
+
+// CMOVNGE performs "Move if not greater or equal (SF != OF)".
+//
+// Mnemonic : CMOVNGE
+// Supported forms : (6 forms)
+//
+// * CMOVNGE r16, r16 [CMOV]
+// * CMOVNGE m16, r16 [CMOV]
+// * CMOVNGE r32, r32 [CMOV]
+// * CMOVNGE m32, r32 [CMOV]
+// * CMOVNGE r64, r64 [CMOV]
+// * CMOVNGE m64, r64 [CMOV]
+//
+func (self *Program) CMOVNGE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNGE", 2, Operands { v0, v1 })
+ // CMOVNGE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNGE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNGE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNGE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNGE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNGE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNGE")
+ }
+ return p
+}
+
+// CMOVNL performs "Move if not less (SF == OF)".
+//
+// Mnemonic : CMOVNL
+// Supported forms : (6 forms)
+//
+// * CMOVNL r16, r16 [CMOV]
+// * CMOVNL m16, r16 [CMOV]
+// * CMOVNL r32, r32 [CMOV]
+// * CMOVNL m32, r32 [CMOV]
+// * CMOVNL r64, r64 [CMOV]
+// * CMOVNL m64, r64 [CMOV]
+//
+func (self *Program) CMOVNL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNL", 2, Operands { v0, v1 })
+ // CMOVNL r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNL m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNL r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNL m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNL")
+ }
+ return p
+}
+
+// CMOVNLE performs "Move if not less or equal (ZF == 0 and SF == OF)".
+//
+// Mnemonic : CMOVNLE
+// Supported forms : (6 forms)
+//
+// * CMOVNLE r16, r16 [CMOV]
+// * CMOVNLE m16, r16 [CMOV]
+// * CMOVNLE r32, r32 [CMOV]
+// * CMOVNLE m32, r32 [CMOV]
+// * CMOVNLE r64, r64 [CMOV]
+// * CMOVNLE m64, r64 [CMOV]
+//
+func (self *Program) CMOVNLE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNLE", 2, Operands { v0, v1 })
+ // CMOVNLE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNLE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNLE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNLE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNLE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNLE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNLE")
+ }
+ return p
+}
+
+// CMOVNO performs "Move if not overflow (OF == 0)".
+//
+// Mnemonic : CMOVNO
+// Supported forms : (6 forms)
+//
+// * CMOVNO r16, r16 [CMOV]
+// * CMOVNO m16, r16 [CMOV]
+// * CMOVNO r32, r32 [CMOV]
+// * CMOVNO m32, r32 [CMOV]
+// * CMOVNO r64, r64 [CMOV]
+// * CMOVNO m64, r64 [CMOV]
+//
+func (self *Program) CMOVNO(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNO", 2, Operands { v0, v1 })
+ // CMOVNO r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNO m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x41)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNO r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNO m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x41)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNO r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNO m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x41)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNO")
+ }
+ return p
+}
+
+// CMOVNP performs "Move if not parity (PF == 0)".
+//
+// Mnemonic : CMOVNP
+// Supported forms : (6 forms)
+//
+// * CMOVNP r16, r16 [CMOV]
+// * CMOVNP m16, r16 [CMOV]
+// * CMOVNP r32, r32 [CMOV]
+// * CMOVNP m32, r32 [CMOV]
+// * CMOVNP r64, r64 [CMOV]
+// * CMOVNP m64, r64 [CMOV]
+//
+func (self *Program) CMOVNP(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNP", 2, Operands { v0, v1 })
+ // CMOVNP r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNP m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNP r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNP m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNP r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNP m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNP")
+ }
+ return p
+}
+
+// CMOVNS performs "Move if not sign (SF == 0)".
+//
+// Mnemonic : CMOVNS
+// Supported forms : (6 forms)
+//
+// * CMOVNS r16, r16 [CMOV]
+// * CMOVNS m16, r16 [CMOV]
+// * CMOVNS r32, r32 [CMOV]
+// * CMOVNS m32, r32 [CMOV]
+// * CMOVNS r64, r64 [CMOV]
+// * CMOVNS m64, r64 [CMOV]
+//
+func (self *Program) CMOVNS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNS", 2, Operands { v0, v1 })
+ // CMOVNS r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x49)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNS m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x49)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNS r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x49)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNS m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x49)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNS r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x49)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNS m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x49)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNS")
+ }
+ return p
+}
+
+// CMOVNZ performs "Move if not zero (ZF == 0)".
+//
+// Mnemonic : CMOVNZ
+// Supported forms : (6 forms)
+//
+// * CMOVNZ r16, r16 [CMOV]
+// * CMOVNZ m16, r16 [CMOV]
+// * CMOVNZ r32, r32 [CMOV]
+// * CMOVNZ m32, r32 [CMOV]
+// * CMOVNZ r64, r64 [CMOV]
+// * CMOVNZ m64, r64 [CMOV]
+//
+func (self *Program) CMOVNZ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVNZ", 2, Operands { v0, v1 })
+ // CMOVNZ r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNZ m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNZ r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNZ m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVNZ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVNZ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x45)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVNZ")
+ }
+ return p
+}
+
+// CMOVO performs "Move if overflow (OF == 1)".
+//
+// Mnemonic : CMOVO
+// Supported forms : (6 forms)
+//
+// * CMOVO r16, r16 [CMOV]
+// * CMOVO m16, r16 [CMOV]
+// * CMOVO r32, r32 [CMOV]
+// * CMOVO m32, r32 [CMOV]
+// * CMOVO r64, r64 [CMOV]
+// * CMOVO m64, r64 [CMOV]
+//
+func (self *Program) CMOVO(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVO", 2, Operands { v0, v1 })
+ // CMOVO r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVO m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x40)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVO r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVO m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x40)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVO r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVO m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x40)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVO")
+ }
+ return p
+}
+
+// CMOVP performs "Move if parity (PF == 1)".
+//
+// Mnemonic : CMOVP
+// Supported forms : (6 forms)
+//
+// * CMOVP r16, r16 [CMOV]
+// * CMOVP m16, r16 [CMOV]
+// * CMOVP r32, r32 [CMOV]
+// * CMOVP m32, r32 [CMOV]
+// * CMOVP r64, r64 [CMOV]
+// * CMOVP m64, r64 [CMOV]
+//
+func (self *Program) CMOVP(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVP", 2, Operands { v0, v1 })
+ // CMOVP r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVP m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVP r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVP m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVP r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVP m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVP")
+ }
+ return p
+}
+
+// CMOVPE performs "Move if parity even (PF == 1)".
+//
+// Mnemonic : CMOVPE
+// Supported forms : (6 forms)
+//
+// * CMOVPE r16, r16 [CMOV]
+// * CMOVPE m16, r16 [CMOV]
+// * CMOVPE r32, r32 [CMOV]
+// * CMOVPE m32, r32 [CMOV]
+// * CMOVPE r64, r64 [CMOV]
+// * CMOVPE m64, r64 [CMOV]
+//
+func (self *Program) CMOVPE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVPE", 2, Operands { v0, v1 })
+ // CMOVPE r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVPE m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVPE r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVPE m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVPE r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVPE m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVPE")
+ }
+ return p
+}
+
+// CMOVPO performs "Move if parity odd (PF == 0)".
+//
+// Mnemonic : CMOVPO
+// Supported forms : (6 forms)
+//
+// * CMOVPO r16, r16 [CMOV]
+// * CMOVPO m16, r16 [CMOV]
+// * CMOVPO r32, r32 [CMOV]
+// * CMOVPO m32, r32 [CMOV]
+// * CMOVPO r64, r64 [CMOV]
+// * CMOVPO m64, r64 [CMOV]
+//
+func (self *Program) CMOVPO(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVPO", 2, Operands { v0, v1 })
+ // CMOVPO r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVPO m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVPO r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVPO m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVPO r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVPO m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x4b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVPO")
+ }
+ return p
+}
+
+// CMOVS performs "Move if sign (SF == 1)".
+//
+// Mnemonic : CMOVS
+// Supported forms : (6 forms)
+//
+// * CMOVS r16, r16 [CMOV]
+// * CMOVS m16, r16 [CMOV]
+// * CMOVS r32, r32 [CMOV]
+// * CMOVS m32, r32 [CMOV]
+// * CMOVS r64, r64 [CMOV]
+// * CMOVS m64, r64 [CMOV]
+//
+func (self *Program) CMOVS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVS", 2, Operands { v0, v1 })
+ // CMOVS r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x48)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVS m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x48)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVS r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x48)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVS m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x48)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVS r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x48)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVS m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x48)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVS")
+ }
+ return p
+}
+
+// CMOVZ performs "Move if zero (ZF == 1)".
+//
+// Mnemonic : CMOVZ
+// Supported forms : (6 forms)
+//
+// * CMOVZ r16, r16 [CMOV]
+// * CMOVZ m16, r16 [CMOV]
+// * CMOVZ r32, r32 [CMOV]
+// * CMOVZ m32, r32 [CMOV]
+// * CMOVZ r64, r64 [CMOV]
+// * CMOVZ m64, r64 [CMOV]
+//
+func (self *Program) CMOVZ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMOVZ", 2, Operands { v0, v1 })
+ // CMOVZ r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVZ m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVZ r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVZ m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMOVZ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMOVZ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_CMOV)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMOVZ")
+ }
+ return p
+}
+
+// CMPB performs "Compare Two Operands".
+//
+// Mnemonic : CMP
+// Supported forms : (6 forms)
+//
+// * CMPB imm8, al
+// * CMPB imm8, r8
+// * CMPB r8, r8
+// * CMPB m8, r8
+// * CMPB imm8, m8
+// * CMPB r8, m8
+//
+func (self *Program) CMPB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMPB", 2, Operands { v0, v1 })
+ // CMPB imm8, al
+ if isImm8(v0) && v1 == AL {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x3c)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0x80)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMPB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x3a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMPB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x80)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x38)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPB")
+ }
+ return p
+}
+
+// CMPL performs "Compare Two Operands".
+//
+// Mnemonic : CMP
+// Supported forms : (8 forms)
+//
+// * CMPL imm32, eax
+// * CMPL imm8, r32
+// * CMPL imm32, r32
+// * CMPL r32, r32
+// * CMPL m32, r32
+// * CMPL imm8, m32
+// * CMPL imm32, m32
+// * CMPL r32, m32
+//
+func (self *Program) CMPL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMPL", 2, Operands { v0, v1 })
+ // CMPL imm32, eax
+ if isImm32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x3d)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // CMPL imm8, r32
+ if isImm8Ext(v0, 4) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // CMPL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMPL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x3b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMPL imm8, m32
+ if isImm8Ext(v0, 4) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // CMPL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x39)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPL")
+ }
+ return p
+}
+
+// CMPPD performs "Compare Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : CMPPD
+// Supported forms : (2 forms)
+//
+// * CMPPD imm8, xmm, xmm [SSE2]
+// * CMPPD imm8, m128, xmm [SSE2]
+//
+func (self *Program) CMPPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("CMPPD", 3, Operands { v0, v1, v2 })
+ // CMPPD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPPD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc2)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPPD")
+ }
+ return p
+}
+
+// CMPPS performs "Compare Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : CMPPS
+// Supported forms : (2 forms)
+//
+// * CMPPS imm8, xmm, xmm [SSE]
+// * CMPPS imm8, m128, xmm [SSE]
+//
+func (self *Program) CMPPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("CMPPS", 3, Operands { v0, v1, v2 })
+ // CMPPS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPPS imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc2)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPPS")
+ }
+ return p
+}
+
+// CMPQ performs "Compare Two Operands".
+//
+// Mnemonic : CMP
+// Supported forms : (8 forms)
+//
+// * CMPQ imm32, rax
+// * CMPQ imm8, r64
+// * CMPQ imm32, r64
+// * CMPQ r64, r64
+// * CMPQ m64, r64
+// * CMPQ imm8, m64
+// * CMPQ imm32, m64
+// * CMPQ r64, m64
+//
+func (self *Program) CMPQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMPQ", 2, Operands { v0, v1 })
+ // CMPQ imm32, rax
+ if isImm32(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x3d)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // CMPQ imm8, r64
+ if isImm8Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x83)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x81)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // CMPQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMPQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x3b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMPQ imm8, m64
+ if isImm8Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x83)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x81)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // CMPQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x39)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPQ")
+ }
+ return p
+}
+
+// CMPSD performs "Compare Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : CMPSD
+// Supported forms : (2 forms)
+//
+// * CMPSD imm8, xmm, xmm [SSE2]
+// * CMPSD imm8, m64, xmm [SSE2]
+//
+func (self *Program) CMPSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("CMPSD", 3, Operands { v0, v1, v2 })
+ // CMPSD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPSD imm8, m64, xmm
+ if isImm8(v0) && isM64(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc2)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPSD")
+ }
+ return p
+}
+
+// CMPSS performs "Compare Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : CMPSS
+// Supported forms : (2 forms)
+//
+// * CMPSS imm8, xmm, xmm [SSE]
+// * CMPSS imm8, m32, xmm [SSE]
+//
+func (self *Program) CMPSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("CMPSS", 3, Operands { v0, v1, v2 })
+ // CMPSS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPSS imm8, m32, xmm
+ if isImm8(v0) && isM32(v1) && isXMM(v2) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc2)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPSS")
+ }
+ return p
+}
+
+// CMPW performs "Compare Two Operands".
+//
+// Mnemonic : CMP
+// Supported forms : (8 forms)
+//
+// * CMPW imm16, ax
+// * CMPW imm8, r16
+// * CMPW imm16, r16
+// * CMPW r16, r16
+// * CMPW m16, r16
+// * CMPW imm8, m16
+// * CMPW imm16, m16
+// * CMPW r16, m16
+//
+func (self *Program) CMPW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMPW", 2, Operands { v0, v1 })
+ // CMPW imm16, ax
+ if isImm16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x3d)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // CMPW imm8, r16
+ if isImm8Ext(v0, 2) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // CMPW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CMPW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x3b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CMPW imm8, m16
+ if isImm8Ext(v0, 2) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // CMPW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // CMPW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x39)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPW")
+ }
+ return p
+}
+
+// CMPXCHG16B performs "Compare and Exchange 16 Bytes".
+//
+// Mnemonic : CMPXCHG16B
+// Supported forms : (1 form)
+//
+// * CMPXCHG16B m128
+//
+func (self *Program) CMPXCHG16B(v0 interface{}) *Instruction {
+ p := self.alloc("CMPXCHG16B", 1, Operands { v0 })
+ // CMPXCHG16B m128
+ if isM128(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xc7)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPXCHG16B")
+ }
+ return p
+}
+
+// CMPXCHG8B performs "Compare and Exchange 8 Bytes".
+//
+// Mnemonic : CMPXCHG8B
+// Supported forms : (1 form)
+//
+// * CMPXCHG8B m64
+//
+func (self *Program) CMPXCHG8B(v0 interface{}) *Instruction {
+ p := self.alloc("CMPXCHG8B", 1, Operands { v0 })
+ // CMPXCHG8B m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xc7)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPXCHG8B")
+ }
+ return p
+}
+
+// CMPXCHGB performs "Compare and Exchange".
+//
+// Mnemonic : CMPXCHG
+// Supported forms : (2 forms)
+//
+// * CMPXCHGB r8, r8
+// * CMPXCHGB r8, m8
+//
+func (self *Program) CMPXCHGB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMPXCHGB", 2, Operands { v0, v1 })
+ // CMPXCHGB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x0f)
+ m.emit(0xb0)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // CMPXCHGB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0xb0)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPXCHGB")
+ }
+ return p
+}
+
+// CMPXCHGL performs "Compare and Exchange".
+//
+// Mnemonic : CMPXCHG
+// Supported forms : (2 forms)
+//
+// * CMPXCHGL r32, r32
+// * CMPXCHGL r32, m32
+//
+func (self *Program) CMPXCHGL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMPXCHGL", 2, Operands { v0, v1 })
+ // CMPXCHGL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xb1)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // CMPXCHGL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xb1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPXCHGL")
+ }
+ return p
+}
+
+// CMPXCHGQ performs "Compare and Exchange".
+//
+// Mnemonic : CMPXCHG
+// Supported forms : (2 forms)
+//
+// * CMPXCHGQ r64, r64
+// * CMPXCHGQ r64, m64
+//
+func (self *Program) CMPXCHGQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMPXCHGQ", 2, Operands { v0, v1 })
+ // CMPXCHGQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xb1)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // CMPXCHGQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xb1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPXCHGQ")
+ }
+ return p
+}
+
+// CMPXCHGW performs "Compare and Exchange".
+//
+// Mnemonic : CMPXCHG
+// Supported forms : (2 forms)
+//
+// * CMPXCHGW r16, r16
+// * CMPXCHGW r16, m16
+//
+func (self *Program) CMPXCHGW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CMPXCHGW", 2, Operands { v0, v1 })
+ // CMPXCHGW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xb1)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // CMPXCHGW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xb1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CMPXCHGW")
+ }
+ return p
+}
+
+// COMISD performs "Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS".
+//
+// Mnemonic : COMISD
+// Supported forms : (2 forms)
+//
+// * COMISD xmm, xmm [SSE2]
+// * COMISD m64, xmm [SSE2]
+//
+func (self *Program) COMISD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("COMISD", 2, Operands { v0, v1 })
+ // COMISD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // COMISD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for COMISD")
+ }
+ return p
+}
+
+// COMISS performs "Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS".
+//
+// Mnemonic : COMISS
+// Supported forms : (2 forms)
+//
+// * COMISS xmm, xmm [SSE]
+// * COMISS m32, xmm [SSE]
+//
+func (self *Program) COMISS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("COMISS", 2, Operands { v0, v1 })
+ // COMISS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // COMISS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for COMISS")
+ }
+ return p
+}
+
+// CPUID performs "CPU Identification".
+//
+// Mnemonic : CPUID
+// Supported forms : (1 form)
+//
+// * CPUID [CPUID]
+//
+func (self *Program) CPUID() *Instruction {
+ p := self.alloc("CPUID", 0, Operands { })
+ // CPUID
+ self.require(ISA_CPUID)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0xa2)
+ })
+ return p
+}
+
+// CQTO performs "Convert Quadword to Octaword".
+//
+// Mnemonic : CQO
+// Supported forms : (1 form)
+//
+// * CQTO
+//
+func (self *Program) CQTO() *Instruction {
+ p := self.alloc("CQTO", 0, Operands { })
+ // CQTO
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x99)
+ })
+ return p
+}
+
+// CRC32B performs "Accumulate CRC32 Value".
+//
+// Mnemonic : CRC32
+// Supported forms : (4 forms)
+//
+// * CRC32B r8, r32 [SSE4.2]
+// * CRC32B m8, r32 [SSE4.2]
+// * CRC32B r8, r64 [SSE4.2]
+// * CRC32B m8, r64 [SSE4.2]
+//
+func (self *Program) CRC32B(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CRC32B", 2, Operands { v0, v1 })
+ // CRC32B r8, r32
+ if isReg8(v0) && isReg32(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf0)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CRC32B m8, r32
+ if isM8(v0) && isReg32(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CRC32B r8, r64
+ if isReg8(v0) && isReg64(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf0)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CRC32B m8, r64
+ if isM8(v0) && isReg64(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CRC32B")
+ }
+ return p
+}
+
+// CRC32L performs "Accumulate CRC32 Value".
+//
+// Mnemonic : CRC32
+// Supported forms : (2 forms)
+//
+// * CRC32L r32, r32 [SSE4.2]
+// * CRC32L m32, r32 [SSE4.2]
+//
+func (self *Program) CRC32L(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CRC32L", 2, Operands { v0, v1 })
+ // CRC32L r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CRC32L m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CRC32L")
+ }
+ return p
+}
+
+// CRC32Q performs "Accumulate CRC32 Value".
+//
+// Mnemonic : CRC32
+// Supported forms : (2 forms)
+//
+// * CRC32Q r64, r64 [SSE4.2]
+// * CRC32Q m64, r64 [SSE4.2]
+//
+func (self *Program) CRC32Q(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CRC32Q", 2, Operands { v0, v1 })
+ // CRC32Q r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CRC32Q m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CRC32Q")
+ }
+ return p
+}
+
+// CRC32W performs "Accumulate CRC32 Value".
+//
+// Mnemonic : CRC32
+// Supported forms : (2 forms)
+//
+// * CRC32W r16, r32 [SSE4.2]
+// * CRC32W m16, r32 [SSE4.2]
+//
+func (self *Program) CRC32W(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CRC32W", 2, Operands { v0, v1 })
+ // CRC32W r16, r32
+ if isReg16(v0) && isReg32(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CRC32W m16, r32
+ if isM16(v0) && isReg32(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CRC32W")
+ }
+ return p
+}
+
+// CVTDQ2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values".
+//
+// Mnemonic : CVTDQ2PD
+// Supported forms : (2 forms)
+//
+// * CVTDQ2PD xmm, xmm [SSE2]
+// * CVTDQ2PD m64, xmm [SSE2]
+//
+func (self *Program) CVTDQ2PD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTDQ2PD", 2, Operands { v0, v1 })
+ // CVTDQ2PD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTDQ2PD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTDQ2PD")
+ }
+ return p
+}
+
+// CVTDQ2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values".
+//
+// Mnemonic : CVTDQ2PS
+// Supported forms : (2 forms)
+//
+// * CVTDQ2PS xmm, xmm [SSE2]
+// * CVTDQ2PS m128, xmm [SSE2]
+//
+func (self *Program) CVTDQ2PS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTDQ2PS", 2, Operands { v0, v1 })
+ // CVTDQ2PS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTDQ2PS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTDQ2PS")
+ }
+ return p
+}
+
+// CVTPD2DQ performs "Convert Packed Double-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : CVTPD2DQ
+// Supported forms : (2 forms)
+//
+// * CVTPD2DQ xmm, xmm [SSE2]
+// * CVTPD2DQ m128, xmm [SSE2]
+//
+func (self *Program) CVTPD2DQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTPD2DQ", 2, Operands { v0, v1 })
+ // CVTPD2DQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTPD2DQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTPD2DQ")
+ }
+ return p
+}
+
+// CVTPD2PI performs "Convert Packed Double-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : CVTPD2PI
+// Supported forms : (2 forms)
+//
+// * CVTPD2PI xmm, mm [SSE]
+// * CVTPD2PI m128, mm [SSE]
+//
+func (self *Program) CVTPD2PI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTPD2PI", 2, Operands { v0, v1 })
+ // CVTPD2PI xmm, mm
+ if isXMM(v0) && isMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTPD2PI m128, mm
+ if isM128(v0) && isMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTPD2PI")
+ }
+ return p
+}
+
+// CVTPD2PS performs "Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values".
+//
+// Mnemonic : CVTPD2PS
+// Supported forms : (2 forms)
+//
+// * CVTPD2PS xmm, xmm [SSE2]
+// * CVTPD2PS m128, xmm [SSE2]
+//
+func (self *Program) CVTPD2PS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTPD2PS", 2, Operands { v0, v1 })
+ // CVTPD2PS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTPD2PS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTPD2PS")
+ }
+ return p
+}
+
+// CVTPI2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values".
+//
+// Mnemonic : CVTPI2PD
+// Supported forms : (2 forms)
+//
+// * CVTPI2PD mm, xmm [SSE2]
+// * CVTPI2PD m64, xmm [SSE2]
+//
+func (self *Program) CVTPI2PD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTPI2PD", 2, Operands { v0, v1 })
+ // CVTPI2PD mm, xmm
+ if isMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTPI2PD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTPI2PD")
+ }
+ return p
+}
+
+// CVTPI2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values".
+//
+// Mnemonic : CVTPI2PS
+// Supported forms : (2 forms)
+//
+// * CVTPI2PS mm, xmm [SSE]
+// * CVTPI2PS m64, xmm [SSE]
+//
+func (self *Program) CVTPI2PS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTPI2PS", 2, Operands { v0, v1 })
+ // CVTPI2PS mm, xmm
+ if isMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTPI2PS m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTPI2PS")
+ }
+ return p
+}
+
+// CVTPS2DQ performs "Convert Packed Single-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : CVTPS2DQ
+// Supported forms : (2 forms)
+//
+// * CVTPS2DQ xmm, xmm [SSE2]
+// * CVTPS2DQ m128, xmm [SSE2]
+//
+func (self *Program) CVTPS2DQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTPS2DQ", 2, Operands { v0, v1 })
+ // CVTPS2DQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTPS2DQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTPS2DQ")
+ }
+ return p
+}
+
+// CVTPS2PD performs "Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values".
+//
+// Mnemonic : CVTPS2PD
+// Supported forms : (2 forms)
+//
+// * CVTPS2PD xmm, xmm [SSE2]
+// * CVTPS2PD m64, xmm [SSE2]
+//
+func (self *Program) CVTPS2PD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTPS2PD", 2, Operands { v0, v1 })
+ // CVTPS2PD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTPS2PD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTPS2PD")
+ }
+ return p
+}
+
+// CVTPS2PI performs "Convert Packed Single-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : CVTPS2PI
+// Supported forms : (2 forms)
+//
+// * CVTPS2PI xmm, mm [SSE]
+// * CVTPS2PI m64, mm [SSE]
+//
+func (self *Program) CVTPS2PI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTPS2PI", 2, Operands { v0, v1 })
+ // CVTPS2PI xmm, mm
+ if isXMM(v0) && isMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTPS2PI m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTPS2PI")
+ }
+ return p
+}
+
+// CVTSD2SI performs "Convert Scalar Double-Precision FP Value to Integer".
+//
+// Mnemonic : CVTSD2SI
+// Supported forms : (4 forms)
+//
+// * CVTSD2SI xmm, r32 [SSE2]
+// * CVTSD2SI m64, r32 [SSE2]
+// * CVTSD2SI xmm, r64 [SSE2]
+// * CVTSD2SI m64, r64 [SSE2]
+//
+func (self *Program) CVTSD2SI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTSD2SI", 2, Operands { v0, v1 })
+ // CVTSD2SI xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSD2SI m64, r32
+ if isM64(v0) && isReg32(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CVTSD2SI xmm, r64
+ if isXMM(v0) && isReg64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSD2SI m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTSD2SI")
+ }
+ return p
+}
+
+// CVTSD2SS performs "Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value".
+//
+// Mnemonic : CVTSD2SS
+// Supported forms : (2 forms)
+//
+// * CVTSD2SS xmm, xmm [SSE2]
+// * CVTSD2SS m64, xmm [SSE2]
+//
+func (self *Program) CVTSD2SS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTSD2SS", 2, Operands { v0, v1 })
+ // CVTSD2SS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSD2SS m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTSD2SS")
+ }
+ return p
+}
+
+// CVTSI2SD performs "Convert Dword Integer to Scalar Double-Precision FP Value".
+//
+// Mnemonic : CVTSI2SD
+// Supported forms : (4 forms)
+//
+// * CVTSI2SD r32, xmm [SSE2]
+// * CVTSI2SD r64, xmm [SSE2]
+// * CVTSI2SD m32, xmm [SSE2]
+// * CVTSI2SD m64, xmm [SSE2]
+//
+func (self *Program) CVTSI2SD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTSI2SD", 2, Operands { v0, v1 })
+ // CVTSI2SD r32, xmm
+ if isReg32(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSI2SD r64, xmm
+ if isReg64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSI2SD m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CVTSI2SD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTSI2SD")
+ }
+ return p
+}
+
+// CVTSI2SS performs "Convert Dword Integer to Scalar Single-Precision FP Value".
+//
+// Mnemonic : CVTSI2SS
+// Supported forms : (4 forms)
+//
+// * CVTSI2SS r32, xmm [SSE]
+// * CVTSI2SS r64, xmm [SSE]
+// * CVTSI2SS m32, xmm [SSE]
+// * CVTSI2SS m64, xmm [SSE]
+//
+func (self *Program) CVTSI2SS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTSI2SS", 2, Operands { v0, v1 })
+ // CVTSI2SS r32, xmm
+ if isReg32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSI2SS r64, xmm
+ if isReg64(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSI2SS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CVTSI2SS m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTSI2SS")
+ }
+ return p
+}
+
+// CVTSS2SD performs "Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value".
+//
+// Mnemonic : CVTSS2SD
+// Supported forms : (2 forms)
+//
+// * CVTSS2SD xmm, xmm [SSE2]
+// * CVTSS2SD m32, xmm [SSE2]
+//
+func (self *Program) CVTSS2SD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTSS2SD", 2, Operands { v0, v1 })
+ // CVTSS2SD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSS2SD m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTSS2SD")
+ }
+ return p
+}
+
+// CVTSS2SI performs "Convert Scalar Single-Precision FP Value to Dword Integer".
+//
+// Mnemonic : CVTSS2SI
+// Supported forms : (4 forms)
+//
+// * CVTSS2SI xmm, r32 [SSE]
+// * CVTSS2SI m32, r32 [SSE]
+// * CVTSS2SI xmm, r64 [SSE]
+// * CVTSS2SI m32, r64 [SSE]
+//
+func (self *Program) CVTSS2SI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTSS2SI", 2, Operands { v0, v1 })
+ // CVTSS2SI xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSS2SI m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CVTSS2SI xmm, r64
+ if isXMM(v0) && isReg64(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTSS2SI m32, r64
+ if isM32(v0) && isReg64(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTSS2SI")
+ }
+ return p
+}
+
+// CVTTPD2DQ performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : CVTTPD2DQ
+// Supported forms : (2 forms)
+//
+// * CVTTPD2DQ xmm, xmm [SSE2]
+// * CVTTPD2DQ m128, xmm [SSE2]
+//
+func (self *Program) CVTTPD2DQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTTPD2DQ", 2, Operands { v0, v1 })
+ // CVTTPD2DQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTTPD2DQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTTPD2DQ")
+ }
+ return p
+}
+
+// CVTTPD2PI performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : CVTTPD2PI
+// Supported forms : (2 forms)
+//
+// * CVTTPD2PI xmm, mm [SSE2]
+// * CVTTPD2PI m128, mm [SSE2]
+//
+func (self *Program) CVTTPD2PI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTTPD2PI", 2, Operands { v0, v1 })
+ // CVTTPD2PI xmm, mm
+ if isXMM(v0) && isMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTTPD2PI m128, mm
+ if isM128(v0) && isMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTTPD2PI")
+ }
+ return p
+}
+
+// CVTTPS2DQ performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : CVTTPS2DQ
+// Supported forms : (2 forms)
+//
+// * CVTTPS2DQ xmm, xmm [SSE2]
+// * CVTTPS2DQ m128, xmm [SSE2]
+//
+func (self *Program) CVTTPS2DQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTTPS2DQ", 2, Operands { v0, v1 })
+ // CVTTPS2DQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTTPS2DQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTTPS2DQ")
+ }
+ return p
+}
+
+// CVTTPS2PI performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : CVTTPS2PI
+// Supported forms : (2 forms)
+//
+// * CVTTPS2PI xmm, mm [SSE]
+// * CVTTPS2PI m64, mm [SSE]
+//
+func (self *Program) CVTTPS2PI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTTPS2PI", 2, Operands { v0, v1 })
+ // CVTTPS2PI xmm, mm
+ if isXMM(v0) && isMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTTPS2PI m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTTPS2PI")
+ }
+ return p
+}
+
+// CVTTSD2SI performs "Convert with Truncation Scalar Double-Precision FP Value to Signed Integer".
+//
+// Mnemonic : CVTTSD2SI
+// Supported forms : (4 forms)
+//
+// * CVTTSD2SI xmm, r32 [SSE2]
+// * CVTTSD2SI m64, r32 [SSE2]
+// * CVTTSD2SI xmm, r64 [SSE2]
+// * CVTTSD2SI m64, r64 [SSE2]
+//
+func (self *Program) CVTTSD2SI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTTSD2SI", 2, Operands { v0, v1 })
+ // CVTTSD2SI xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTTSD2SI m64, r32
+ if isM64(v0) && isReg32(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CVTTSD2SI xmm, r64
+ if isXMM(v0) && isReg64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTTSD2SI m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTTSD2SI")
+ }
+ return p
+}
+
+// CVTTSS2SI performs "Convert with Truncation Scalar Single-Precision FP Value to Dword Integer".
+//
+// Mnemonic : CVTTSS2SI
+// Supported forms : (4 forms)
+//
+// * CVTTSS2SI xmm, r32 [SSE]
+// * CVTTSS2SI m32, r32 [SSE]
+// * CVTTSS2SI xmm, r64 [SSE]
+// * CVTTSS2SI m32, r64 [SSE]
+//
+func (self *Program) CVTTSS2SI(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("CVTTSS2SI", 2, Operands { v0, v1 })
+ // CVTTSS2SI xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTTSS2SI m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // CVTTSS2SI xmm, r64
+ if isXMM(v0) && isReg64(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // CVTTSS2SI m32, r64
+ if isM32(v0) && isReg64(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for CVTTSS2SI")
+ }
+ return p
+}
+
+// CWTD performs "Convert Word to Doubleword".
+//
+// Mnemonic : CWD
+// Supported forms : (1 form)
+//
+// * CWTD
+//
+func (self *Program) CWTD() *Instruction {
+ p := self.alloc("CWTD", 0, Operands { })
+ // CWTD
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x99)
+ })
+ return p
+}
+
+// CWTL performs "Convert Word to Doubleword".
+//
+// Mnemonic : CWDE
+// Supported forms : (1 form)
+//
+// * CWTL
+//
+func (self *Program) CWTL() *Instruction {
+ p := self.alloc("CWTL", 0, Operands { })
+ // CWTL
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x98)
+ })
+ return p
+}
+
+// DECB performs "Decrement by 1".
+//
+// Mnemonic : DEC
+// Supported forms : (2 forms)
+//
+// * DECB r8
+// * DECB m8
+//
+func (self *Program) DECB(v0 interface{}) *Instruction {
+ p := self.alloc("DECB", 1, Operands { v0 })
+ // DECB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0xfe)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // DECB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xfe)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DECB")
+ }
+ return p
+}
+
+// DECL performs "Decrement by 1".
+//
+// Mnemonic : DEC
+// Supported forms : (2 forms)
+//
+// * DECL r32
+// * DECL m32
+//
+func (self *Program) DECL(v0 interface{}) *Instruction {
+ p := self.alloc("DECL", 1, Operands { v0 })
+ // DECL r32
+ if isReg32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xff)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // DECL m32
+ if isM32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xff)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DECL")
+ }
+ return p
+}
+
+// DECQ performs "Decrement by 1".
+//
+// Mnemonic : DEC
+// Supported forms : (2 forms)
+//
+// * DECQ r64
+// * DECQ m64
+//
+func (self *Program) DECQ(v0 interface{}) *Instruction {
+ p := self.alloc("DECQ", 1, Operands { v0 })
+ // DECQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0xff)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // DECQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[0]))
+ m.emit(0xff)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DECQ")
+ }
+ return p
+}
+
+// DECW performs "Decrement by 1".
+//
+// Mnemonic : DEC
+// Supported forms : (2 forms)
+//
+// * DECW r16
+// * DECW m16
+//
+func (self *Program) DECW(v0 interface{}) *Instruction {
+ p := self.alloc("DECW", 1, Operands { v0 })
+ // DECW r16
+ if isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0xff)
+ m.emit(0xc8 | lcode(v[0]))
+ })
+ }
+ // DECW m16
+ if isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xff)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DECW")
+ }
+ return p
+}
+
+// DIVB performs "Unsigned Divide".
+//
+// Mnemonic : DIV
+// Supported forms : (2 forms)
+//
+// * DIVB r8
+// * DIVB m8
+//
+func (self *Program) DIVB(v0 interface{}) *Instruction {
+ p := self.alloc("DIVB", 1, Operands { v0 })
+ // DIVB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0xf6)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // DIVB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf6)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DIVB")
+ }
+ return p
+}
+
+// DIVL performs "Unsigned Divide".
+//
+// Mnemonic : DIV
+// Supported forms : (2 forms)
+//
+// * DIVL r32
+// * DIVL m32
+//
+func (self *Program) DIVL(v0 interface{}) *Instruction {
+ p := self.alloc("DIVL", 1, Operands { v0 })
+ // DIVL r32
+ if isReg32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // DIVL m32
+ if isM32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DIVL")
+ }
+ return p
+}
+
+// DIVPD performs "Divide Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : DIVPD
+// Supported forms : (2 forms)
+//
+// * DIVPD xmm, xmm [SSE2]
+// * DIVPD m128, xmm [SSE2]
+//
+func (self *Program) DIVPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("DIVPD", 2, Operands { v0, v1 })
+ // DIVPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // DIVPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DIVPD")
+ }
+ return p
+}
+
+// DIVPS performs "Divide Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : DIVPS
+// Supported forms : (2 forms)
+//
+// * DIVPS xmm, xmm [SSE]
+// * DIVPS m128, xmm [SSE]
+//
+func (self *Program) DIVPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("DIVPS", 2, Operands { v0, v1 })
+ // DIVPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // DIVPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DIVPS")
+ }
+ return p
+}
+
+// DIVQ performs "Unsigned Divide".
+//
+// Mnemonic : DIV
+// Supported forms : (2 forms)
+//
+// * DIVQ r64
+// * DIVQ m64
+//
+func (self *Program) DIVQ(v0 interface{}) *Instruction {
+ p := self.alloc("DIVQ", 1, Operands { v0 })
+ // DIVQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0xf7)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // DIVQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[0]))
+ m.emit(0xf7)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DIVQ")
+ }
+ return p
+}
+
+// DIVSD performs "Divide Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : DIVSD
+// Supported forms : (2 forms)
+//
+// * DIVSD xmm, xmm [SSE2]
+// * DIVSD m64, xmm [SSE2]
+//
+func (self *Program) DIVSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("DIVSD", 2, Operands { v0, v1 })
+ // DIVSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // DIVSD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DIVSD")
+ }
+ return p
+}
+
+// DIVSS performs "Divide Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : DIVSS
+// Supported forms : (2 forms)
+//
+// * DIVSS xmm, xmm [SSE]
+// * DIVSS m32, xmm [SSE]
+//
+func (self *Program) DIVSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("DIVSS", 2, Operands { v0, v1 })
+ // DIVSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // DIVSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DIVSS")
+ }
+ return p
+}
+
+// DIVW performs "Unsigned Divide".
+//
+// Mnemonic : DIV
+// Supported forms : (2 forms)
+//
+// * DIVW r16
+// * DIVW m16
+//
+func (self *Program) DIVW(v0 interface{}) *Instruction {
+ p := self.alloc("DIVW", 1, Operands { v0 })
+ // DIVW r16
+ if isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // DIVW m16
+ if isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DIVW")
+ }
+ return p
+}
+
+// DPPD performs "Dot Product of Packed Double Precision Floating-Point Values".
+//
+// Mnemonic : DPPD
+// Supported forms : (2 forms)
+//
+// * DPPD imm8, xmm, xmm [SSE4.1]
+// * DPPD imm8, m128, xmm [SSE4.1]
+//
+func (self *Program) DPPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("DPPD", 3, Operands { v0, v1, v2 })
+ // DPPD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // DPPD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x41)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DPPD")
+ }
+ return p
+}
+
+// DPPS performs "Dot Product of Packed Single Precision Floating-Point Values".
+//
+// Mnemonic : DPPS
+// Supported forms : (2 forms)
+//
+// * DPPS imm8, xmm, xmm [SSE4.1]
+// * DPPS imm8, m128, xmm [SSE4.1]
+//
+func (self *Program) DPPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("DPPS", 3, Operands { v0, v1, v2 })
+ // DPPS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // DPPS imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x40)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for DPPS")
+ }
+ return p
+}
+
+// EMMS performs "Exit MMX State".
+//
+// Mnemonic : EMMS
+// Supported forms : (1 form)
+//
+// * EMMS [MMX]
+//
+func (self *Program) EMMS() *Instruction {
+ p := self.alloc("EMMS", 0, Operands { })
+ // EMMS
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x77)
+ })
+ return p
+}
+
+// EXTRACTPS performs "Extract Packed Single Precision Floating-Point Value".
+//
+// Mnemonic : EXTRACTPS
+// Supported forms : (2 forms)
+//
+// * EXTRACTPS imm8, xmm, r32 [SSE4.1]
+// * EXTRACTPS imm8, xmm, m32 [SSE4.1]
+//
+func (self *Program) EXTRACTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("EXTRACTPS", 3, Operands { v0, v1, v2 })
+ // EXTRACTPS imm8, xmm, r32
+ if isImm8(v0) && isXMM(v1) && isReg32(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x17)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // EXTRACTPS imm8, xmm, m32
+ if isImm8(v0) && isXMM(v1) && isM32(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x17)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for EXTRACTPS")
+ }
+ return p
+}
+
+// EXTRQ performs "Extract Field".
+//
+// Mnemonic : EXTRQ
+// Supported forms : (2 forms)
+//
+// * EXTRQ xmm, xmm [SSE4A]
+// * EXTRQ imm8, imm8, xmm [SSE4A]
+//
+func (self *Program) EXTRQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("EXTRQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("EXTRQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction EXTRQ takes 2 or 3 operands")
+ }
+ // EXTRQ xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4A)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // EXTRQ imm8, imm8, xmm
+ if len(vv) == 1 && isImm8(v0) && isImm8(v1) && isXMM(vv[0]) {
+ self.require(ISA_SSE4A)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[2], false)
+ m.emit(0x0f)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[2]))
+ m.imm1(toImmAny(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for EXTRQ")
+ }
+ return p
+}
+
+// FEMMS performs "Fast Exit Multimedia State".
+//
+// Mnemonic : FEMMS
+// Supported forms : (1 form)
+//
+// * FEMMS [FEMMS]
+//
+func (self *Program) FEMMS() *Instruction {
+ p := self.alloc("FEMMS", 0, Operands { })
+ // FEMMS
+ self.require(ISA_FEMMS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x0e)
+ })
+ return p
+}
+
+// HADDPD performs "Packed Double-FP Horizontal Add".
+//
+// Mnemonic : HADDPD
+// Supported forms : (2 forms)
+//
+// * HADDPD xmm, xmm [SSE3]
+// * HADDPD m128, xmm [SSE3]
+//
+func (self *Program) HADDPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("HADDPD", 2, Operands { v0, v1 })
+ // HADDPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // HADDPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x7c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for HADDPD")
+ }
+ return p
+}
+
+// HADDPS performs "Packed Single-FP Horizontal Add".
+//
+// Mnemonic : HADDPS
+// Supported forms : (2 forms)
+//
+// * HADDPS xmm, xmm [SSE3]
+// * HADDPS m128, xmm [SSE3]
+//
+func (self *Program) HADDPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("HADDPS", 2, Operands { v0, v1 })
+ // HADDPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // HADDPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x7c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for HADDPS")
+ }
+ return p
+}
+
+// HSUBPD performs "Packed Double-FP Horizontal Subtract".
+//
+// Mnemonic : HSUBPD
+// Supported forms : (2 forms)
+//
+// * HSUBPD xmm, xmm [SSE3]
+// * HSUBPD m128, xmm [SSE3]
+//
+func (self *Program) HSUBPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("HSUBPD", 2, Operands { v0, v1 })
+ // HSUBPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // HSUBPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x7d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for HSUBPD")
+ }
+ return p
+}
+
+// HSUBPS performs "Packed Single-FP Horizontal Subtract".
+//
+// Mnemonic : HSUBPS
+// Supported forms : (2 forms)
+//
+// * HSUBPS xmm, xmm [SSE3]
+// * HSUBPS m128, xmm [SSE3]
+//
+func (self *Program) HSUBPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("HSUBPS", 2, Operands { v0, v1 })
+ // HSUBPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // HSUBPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x7d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for HSUBPS")
+ }
+ return p
+}
+
+// IDIVB performs "Signed Divide".
+//
+// Mnemonic : IDIV
+// Supported forms : (2 forms)
+//
+// * IDIVB r8
+// * IDIVB m8
+//
+func (self *Program) IDIVB(v0 interface{}) *Instruction {
+ p := self.alloc("IDIVB", 1, Operands { v0 })
+ // IDIVB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0xf6)
+ m.emit(0xf8 | lcode(v[0]))
+ })
+ }
+ // IDIVB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf6)
+ m.mrsd(7, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for IDIVB")
+ }
+ return p
+}
+
+// IDIVL performs "Signed Divide".
+//
+// Mnemonic : IDIV
+// Supported forms : (2 forms)
+//
+// * IDIVL r32
+// * IDIVL m32
+//
+func (self *Program) IDIVL(v0 interface{}) *Instruction {
+ p := self.alloc("IDIVL", 1, Operands { v0 })
+ // IDIVL r32
+ if isReg32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xf8 | lcode(v[0]))
+ })
+ }
+ // IDIVL m32
+ if isM32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(7, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for IDIVL")
+ }
+ return p
+}
+
+// IDIVQ performs "Signed Divide".
+//
+// Mnemonic : IDIV
+// Supported forms : (2 forms)
+//
+// * IDIVQ r64
+// * IDIVQ m64
+//
+func (self *Program) IDIVQ(v0 interface{}) *Instruction {
+ p := self.alloc("IDIVQ", 1, Operands { v0 })
+ // IDIVQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0xf7)
+ m.emit(0xf8 | lcode(v[0]))
+ })
+ }
+ // IDIVQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[0]))
+ m.emit(0xf7)
+ m.mrsd(7, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for IDIVQ")
+ }
+ return p
+}
+
+// IDIVW performs "Signed Divide".
+//
+// Mnemonic : IDIV
+// Supported forms : (2 forms)
+//
+// * IDIVW r16
+// * IDIVW m16
+//
+func (self *Program) IDIVW(v0 interface{}) *Instruction {
+ p := self.alloc("IDIVW", 1, Operands { v0 })
+ // IDIVW r16
+ if isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xf8 | lcode(v[0]))
+ })
+ }
+ // IDIVW m16
+ if isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(7, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for IDIVW")
+ }
+ return p
+}
+
+// IMULB performs "Signed Multiply".
+//
+// Mnemonic : IMUL
+// Supported forms : (2 forms)
+//
+// * IMULB r8
+// * IMULB m8
+//
+func (self *Program) IMULB(v0 interface{}) *Instruction {
+ p := self.alloc("IMULB", 1, Operands { v0 })
+ // IMULB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0xf6)
+ m.emit(0xe8 | lcode(v[0]))
+ })
+ }
+ // IMULB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf6)
+ m.mrsd(5, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for IMULB")
+ }
+ return p
+}
+
+// IMULL performs "Signed Multiply".
+//
+// Mnemonic : IMUL
+// Supported forms : (8 forms)
+//
+// * IMULL r32
+// * IMULL m32
+// * IMULL r32, r32
+// * IMULL m32, r32
+// * IMULL imm8, r32, r32
+// * IMULL imm32, r32, r32
+// * IMULL imm8, m32, r32
+// * IMULL imm32, m32, r32
+//
+func (self *Program) IMULL(v0 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("IMULL", 1, Operands { v0 })
+ case 1 : p = self.alloc("IMULL", 2, Operands { v0, vv[0] })
+ case 2 : p = self.alloc("IMULL", 3, Operands { v0, vv[0], vv[1] })
+ default : panic("instruction IMULL takes 1 or 2 or 3 operands")
+ }
+ // IMULL r32
+ if len(vv) == 0 && isReg32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xe8 | lcode(v[0]))
+ })
+ }
+ // IMULL m32
+ if len(vv) == 0 && isM32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(5, addr(v[0]), 1)
+ })
+ }
+ // IMULL r32, r32
+ if len(vv) == 1 && isReg32(v0) && isReg32(vv[0]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xaf)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // IMULL m32, r32
+ if len(vv) == 1 && isM32(v0) && isReg32(vv[0]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xaf)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // IMULL imm8, r32, r32
+ if len(vv) == 2 && isImm8(v0) && isReg32(vv[0]) && isReg32(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // IMULL imm32, r32, r32
+ if len(vv) == 2 && isImm32(v0) && isReg32(vv[0]) && isReg32(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // IMULL imm8, m32, r32
+ if len(vv) == 2 && isImm8(v0) && isM32(vv[0]) && isReg32(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x6b)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // IMULL imm32, m32, r32
+ if len(vv) == 2 && isImm32(v0) && isM32(vv[0]) && isReg32(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x69)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for IMULL")
+ }
+ return p
+}
+
+// IMULQ performs "Signed Multiply".
+//
+// Mnemonic : IMUL
+// Supported forms : (8 forms)
+//
+// * IMULQ r64
+// * IMULQ m64
+// * IMULQ r64, r64
+// * IMULQ m64, r64
+// * IMULQ imm8, r64, r64
+// * IMULQ imm32, r64, r64
+// * IMULQ imm8, m64, r64
+// * IMULQ imm32, m64, r64
+//
+func (self *Program) IMULQ(v0 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("IMULQ", 1, Operands { v0 })
+ case 1 : p = self.alloc("IMULQ", 2, Operands { v0, vv[0] })
+ case 2 : p = self.alloc("IMULQ", 3, Operands { v0, vv[0], vv[1] })
+ default : panic("instruction IMULQ takes 1 or 2 or 3 operands")
+ }
+ // IMULQ r64
+ if len(vv) == 0 && isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0xf7)
+ m.emit(0xe8 | lcode(v[0]))
+ })
+ }
+ // IMULQ m64
+ if len(vv) == 0 && isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[0]))
+ m.emit(0xf7)
+ m.mrsd(5, addr(v[0]), 1)
+ })
+ }
+ // IMULQ r64, r64
+ if len(vv) == 1 && isReg64(v0) && isReg64(vv[0]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xaf)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // IMULQ m64, r64
+ if len(vv) == 1 && isM64(v0) && isReg64(vv[0]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xaf)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // IMULQ imm8, r64, r64
+ if len(vv) == 2 && isImm8(v0) && isReg64(vv[0]) && isReg64(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1]))
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // IMULQ imm32, r64, r64
+ if len(vv) == 2 && isImm32(v0) && isReg64(vv[0]) && isReg64(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1]))
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // IMULQ imm8, m64, r64
+ if len(vv) == 2 && isImm8(v0) && isM64(vv[0]) && isReg64(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[2]), addr(v[1]))
+ m.emit(0x6b)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // IMULQ imm32, m64, r64
+ if len(vv) == 2 && isImm32(v0) && isM64(vv[0]) && isReg64(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[2]), addr(v[1]))
+ m.emit(0x69)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for IMULQ")
+ }
+ return p
+}
+
+// IMULW performs "Signed Multiply".
+//
+// Mnemonic : IMUL
+// Supported forms : (8 forms)
+//
+// * IMULW r16
+// * IMULW m16
+// * IMULW r16, r16
+// * IMULW m16, r16
+// * IMULW imm8, r16, r16
+// * IMULW imm16, r16, r16
+// * IMULW imm8, m16, r16
+// * IMULW imm16, m16, r16
+//
+func (self *Program) IMULW(v0 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("IMULW", 1, Operands { v0 })
+ case 1 : p = self.alloc("IMULW", 2, Operands { v0, vv[0] })
+ case 2 : p = self.alloc("IMULW", 3, Operands { v0, vv[0], vv[1] })
+ default : panic("instruction IMULW takes 1 or 2 or 3 operands")
+ }
+ // IMULW r16
+ if len(vv) == 0 && isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xe8 | lcode(v[0]))
+ })
+ }
+ // IMULW m16
+ if len(vv) == 0 && isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(5, addr(v[0]), 1)
+ })
+ }
+ // IMULW r16, r16
+ if len(vv) == 1 && isReg16(v0) && isReg16(vv[0]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xaf)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // IMULW m16, r16
+ if len(vv) == 1 && isM16(v0) && isReg16(vv[0]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xaf)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // IMULW imm8, r16, r16
+ if len(vv) == 2 && isImm8(v0) && isReg16(vv[0]) && isReg16(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // IMULW imm16, r16, r16
+ if len(vv) == 2 && isImm16(v0) && isReg16(vv[0]) && isReg16(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // IMULW imm8, m16, r16
+ if len(vv) == 2 && isImm8(v0) && isM16(vv[0]) && isReg16(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x6b)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // IMULW imm16, m16, r16
+ if len(vv) == 2 && isImm16(v0) && isM16(vv[0]) && isReg16(vv[1]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x69)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for IMULW")
+ }
+ return p
+}
+
+// INCB performs "Increment by 1".
+//
+// Mnemonic : INC
+// Supported forms : (2 forms)
+//
+// * INCB r8
+// * INCB m8
+//
+func (self *Program) INCB(v0 interface{}) *Instruction {
+ p := self.alloc("INCB", 1, Operands { v0 })
+ // INCB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0xfe)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // INCB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xfe)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for INCB")
+ }
+ return p
+}
+
+// INCL performs "Increment by 1".
+//
+// Mnemonic : INC
+// Supported forms : (2 forms)
+//
+// * INCL r32
+// * INCL m32
+//
+func (self *Program) INCL(v0 interface{}) *Instruction {
+ p := self.alloc("INCL", 1, Operands { v0 })
+ // INCL r32
+ if isReg32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xff)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // INCL m32
+ if isM32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xff)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for INCL")
+ }
+ return p
+}
+
+// INCQ performs "Increment by 1".
+//
+// Mnemonic : INC
+// Supported forms : (2 forms)
+//
+// * INCQ r64
+// * INCQ m64
+//
+func (self *Program) INCQ(v0 interface{}) *Instruction {
+ p := self.alloc("INCQ", 1, Operands { v0 })
+ // INCQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0xff)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // INCQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[0]))
+ m.emit(0xff)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for INCQ")
+ }
+ return p
+}
+
+// INCW performs "Increment by 1".
+//
+// Mnemonic : INC
+// Supported forms : (2 forms)
+//
+// * INCW r16
+// * INCW m16
+//
+func (self *Program) INCW(v0 interface{}) *Instruction {
+ p := self.alloc("INCW", 1, Operands { v0 })
+ // INCW r16
+ if isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0xff)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // INCW m16
+ if isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xff)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for INCW")
+ }
+ return p
+}
+
+// INSERTPS performs "Insert Packed Single Precision Floating-Point Value".
+//
+// Mnemonic : INSERTPS
+// Supported forms : (2 forms)
+//
+// * INSERTPS imm8, xmm, xmm [SSE4.1]
+// * INSERTPS imm8, m32, xmm [SSE4.1]
+//
+func (self *Program) INSERTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("INSERTPS", 3, Operands { v0, v1, v2 })
+ // INSERTPS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // INSERTPS imm8, m32, xmm
+ if isImm8(v0) && isM32(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x21)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for INSERTPS")
+ }
+ return p
+}
+
+// INSERTQ performs "Insert Field".
+//
+// Mnemonic : INSERTQ
+// Supported forms : (2 forms)
+//
+// * INSERTQ xmm, xmm [SSE4A]
+// * INSERTQ imm8, imm8, xmm, xmm [SSE4A]
+//
+func (self *Program) INSERTQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("INSERTQ", 2, Operands { v0, v1 })
+ case 2 : p = self.alloc("INSERTQ", 4, Operands { v0, v1, vv[0], vv[1] })
+ default : panic("instruction INSERTQ takes 2 or 4 operands")
+ }
+ // INSERTQ xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4A)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // INSERTQ imm8, imm8, xmm, xmm
+ if len(vv) == 2 && isImm8(v0) && isImm8(v1) && isXMM(vv[0]) && isXMM(vv[1]) {
+ self.require(ISA_SSE4A)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[3]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for INSERTQ")
+ }
+ return p
+}
+
+// INT performs "Call to Interrupt Procedure".
+//
+// Mnemonic : INT
+// Supported forms : (2 forms)
+//
+// * INT 3
+// * INT imm8
+//
+func (self *Program) INT(v0 interface{}) *Instruction {
+ p := self.alloc("INT", 1, Operands { v0 })
+ // INT 3
+ if isConst3(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xcc)
+ })
+ }
+ // INT imm8
+ if isImm8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xcd)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for INT")
+ }
+ return p
+}
+
+// JA performs "Jump if above (CF == 0 and ZF == 0)".
+//
+// Mnemonic : JA
+// Supported forms : (2 forms)
+//
+// * JA rel8
+// * JA rel32
+//
+func (self *Program) JA(v0 interface{}) *Instruction {
+ p := self.alloc("JA", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JA rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x77)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JA rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x87)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JA label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x77)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x87)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JA")
+ }
+ return p
+}
+
+// JAE performs "Jump if above or equal (CF == 0)".
+//
+// Mnemonic : JAE
+// Supported forms : (2 forms)
+//
+// * JAE rel8
+// * JAE rel32
+//
+func (self *Program) JAE(v0 interface{}) *Instruction {
+ p := self.alloc("JAE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JAE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x73)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JAE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x83)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JAE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x73)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x83)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JAE")
+ }
+ return p
+}
+
+// JB performs "Jump if below (CF == 1)".
+//
+// Mnemonic : JB
+// Supported forms : (2 forms)
+//
+// * JB rel8
+// * JB rel32
+//
+func (self *Program) JB(v0 interface{}) *Instruction {
+ p := self.alloc("JB", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JB rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x72)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JB rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x82)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JB label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x72)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x82)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JB")
+ }
+ return p
+}
+
+// JBE performs "Jump if below or equal (CF == 1 or ZF == 1)".
+//
+// Mnemonic : JBE
+// Supported forms : (2 forms)
+//
+// * JBE rel8
+// * JBE rel32
+//
+func (self *Program) JBE(v0 interface{}) *Instruction {
+ p := self.alloc("JBE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JBE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x76)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JBE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x86)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JBE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x76)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x86)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JBE")
+ }
+ return p
+}
+
+// JC performs "Jump if carry (CF == 1)".
+//
+// Mnemonic : JC
+// Supported forms : (2 forms)
+//
+// * JC rel8
+// * JC rel32
+//
+func (self *Program) JC(v0 interface{}) *Instruction {
+ p := self.alloc("JC", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JC rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x72)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JC rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x82)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JC label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x72)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x82)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JC")
+ }
+ return p
+}
+
+// JE performs "Jump if equal (ZF == 1)".
+//
+// Mnemonic : JE
+// Supported forms : (2 forms)
+//
+// * JE rel8
+// * JE rel32
+//
+func (self *Program) JE(v0 interface{}) *Instruction {
+ p := self.alloc("JE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x74)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x84)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x74)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x84)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JE")
+ }
+ return p
+}
+
+// JECXZ performs "Jump if ECX register is 0".
+//
+// Mnemonic : JECXZ
+// Supported forms : (1 form)
+//
+// * JECXZ rel8
+//
+func (self *Program) JECXZ(v0 interface{}) *Instruction {
+ p := self.alloc("JECXZ", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JECXZ rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xe3)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JECXZ label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0xe3)
+ m.imm1(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JECXZ")
+ }
+ return p
+}
+
+// JG performs "Jump if greater (ZF == 0 and SF == OF)".
+//
+// Mnemonic : JG
+// Supported forms : (2 forms)
+//
+// * JG rel8
+// * JG rel32
+//
+func (self *Program) JG(v0 interface{}) *Instruction {
+ p := self.alloc("JG", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JG rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7f)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JG rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8f)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JG label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7f)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8f)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JG")
+ }
+ return p
+}
+
+// JGE performs "Jump if greater or equal (SF == OF)".
+//
+// Mnemonic : JGE
+// Supported forms : (2 forms)
+//
+// * JGE rel8
+// * JGE rel32
+//
+func (self *Program) JGE(v0 interface{}) *Instruction {
+ p := self.alloc("JGE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JGE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7d)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JGE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8d)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JGE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7d)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8d)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JGE")
+ }
+ return p
+}
+
+// JL performs "Jump if less (SF != OF)".
+//
+// Mnemonic : JL
+// Supported forms : (2 forms)
+//
+// * JL rel8
+// * JL rel32
+//
+func (self *Program) JL(v0 interface{}) *Instruction {
+ p := self.alloc("JL", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JL rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7c)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JL rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8c)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JL label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7c)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8c)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JL")
+ }
+ return p
+}
+
+// JLE performs "Jump if less or equal (ZF == 1 or SF != OF)".
+//
+// Mnemonic : JLE
+// Supported forms : (2 forms)
+//
+// * JLE rel8
+// * JLE rel32
+//
+func (self *Program) JLE(v0 interface{}) *Instruction {
+ p := self.alloc("JLE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JLE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7e)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JLE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8e)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JLE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7e)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8e)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JLE")
+ }
+ return p
+}
+
+// JMP performs "Jump Unconditionally".
+//
+// Mnemonic : JMP
+// Supported forms : (2 forms)
+//
+// * JMP rel8
+// * JMP rel32
+//
+func (self *Program) JMP(v0 interface{}) *Instruction {
+ p := self.alloc("JMP", 1, Operands { v0 })
+ p.branch = _B_unconditional
+ // JMP rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xeb)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JMP rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xe9)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JMP label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0xeb)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0xe9)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JMP")
+ }
+ return p
+}
+
+// JMPQ performs "Jump Unconditionally".
+//
+// Mnemonic : JMP
+// Supported forms : (2 forms)
+//
+// * JMPQ r64
+// * JMPQ m64
+//
+func (self *Program) JMPQ(v0 interface{}) *Instruction {
+ p := self.alloc("JMPQ", 1, Operands { v0 })
+ // JMPQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xff)
+ m.emit(0xe0 | lcode(v[0]))
+ })
+ }
+ // JMPQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xff)
+ m.mrsd(4, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JMPQ")
+ }
+ return p
+}
+
+// JNA performs "Jump if not above (CF == 1 or ZF == 1)".
+//
+// Mnemonic : JNA
+// Supported forms : (2 forms)
+//
+// * JNA rel8
+// * JNA rel32
+//
+func (self *Program) JNA(v0 interface{}) *Instruction {
+ p := self.alloc("JNA", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNA rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x76)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNA rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x86)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNA label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x76)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x86)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNA")
+ }
+ return p
+}
+
+// JNAE performs "Jump if not above or equal (CF == 1)".
+//
+// Mnemonic : JNAE
+// Supported forms : (2 forms)
+//
+// * JNAE rel8
+// * JNAE rel32
+//
+func (self *Program) JNAE(v0 interface{}) *Instruction {
+ p := self.alloc("JNAE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNAE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x72)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNAE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x82)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNAE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x72)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x82)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNAE")
+ }
+ return p
+}
+
+// JNB performs "Jump if not below (CF == 0)".
+//
+// Mnemonic : JNB
+// Supported forms : (2 forms)
+//
+// * JNB rel8
+// * JNB rel32
+//
+func (self *Program) JNB(v0 interface{}) *Instruction {
+ p := self.alloc("JNB", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNB rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x73)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNB rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x83)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNB label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x73)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x83)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNB")
+ }
+ return p
+}
+
+// JNBE performs "Jump if not below or equal (CF == 0 and ZF == 0)".
+//
+// Mnemonic : JNBE
+// Supported forms : (2 forms)
+//
+// * JNBE rel8
+// * JNBE rel32
+//
+func (self *Program) JNBE(v0 interface{}) *Instruction {
+ p := self.alloc("JNBE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNBE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x77)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNBE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x87)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNBE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x77)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x87)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNBE")
+ }
+ return p
+}
+
+// JNC performs "Jump if not carry (CF == 0)".
+//
+// Mnemonic : JNC
+// Supported forms : (2 forms)
+//
+// * JNC rel8
+// * JNC rel32
+//
+func (self *Program) JNC(v0 interface{}) *Instruction {
+ p := self.alloc("JNC", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNC rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x73)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNC rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x83)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNC label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x73)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x83)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNC")
+ }
+ return p
+}
+
+// JNE performs "Jump if not equal (ZF == 0)".
+//
+// Mnemonic : JNE
+// Supported forms : (2 forms)
+//
+// * JNE rel8
+// * JNE rel32
+//
+func (self *Program) JNE(v0 interface{}) *Instruction {
+ p := self.alloc("JNE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x75)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x85)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x75)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x85)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNE")
+ }
+ return p
+}
+
+// JNG performs "Jump if not greater (ZF == 1 or SF != OF)".
+//
+// Mnemonic : JNG
+// Supported forms : (2 forms)
+//
+// * JNG rel8
+// * JNG rel32
+//
+func (self *Program) JNG(v0 interface{}) *Instruction {
+ p := self.alloc("JNG", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNG rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7e)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNG rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8e)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNG label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7e)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8e)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNG")
+ }
+ return p
+}
+
+// JNGE performs "Jump if not greater or equal (SF != OF)".
+//
+// Mnemonic : JNGE
+// Supported forms : (2 forms)
+//
+// * JNGE rel8
+// * JNGE rel32
+//
+func (self *Program) JNGE(v0 interface{}) *Instruction {
+ p := self.alloc("JNGE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNGE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7c)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNGE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8c)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNGE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7c)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8c)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNGE")
+ }
+ return p
+}
+
+// JNL performs "Jump if not less (SF == OF)".
+//
+// Mnemonic : JNL
+// Supported forms : (2 forms)
+//
+// * JNL rel8
+// * JNL rel32
+//
+func (self *Program) JNL(v0 interface{}) *Instruction {
+ p := self.alloc("JNL", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNL rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7d)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNL rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8d)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNL label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7d)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8d)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNL")
+ }
+ return p
+}
+
+// JNLE performs "Jump if not less or equal (ZF == 0 and SF == OF)".
+//
+// Mnemonic : JNLE
+// Supported forms : (2 forms)
+//
+// * JNLE rel8
+// * JNLE rel32
+//
+func (self *Program) JNLE(v0 interface{}) *Instruction {
+ p := self.alloc("JNLE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNLE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7f)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNLE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8f)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNLE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7f)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8f)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNLE")
+ }
+ return p
+}
+
+// JNO performs "Jump if not overflow (OF == 0)".
+//
+// Mnemonic : JNO
+// Supported forms : (2 forms)
+//
+// * JNO rel8
+// * JNO rel32
+//
+func (self *Program) JNO(v0 interface{}) *Instruction {
+ p := self.alloc("JNO", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNO rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x71)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNO rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x81)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNO label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x71)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x81)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNO")
+ }
+ return p
+}
+
+// JNP performs "Jump if not parity (PF == 0)".
+//
+// Mnemonic : JNP
+// Supported forms : (2 forms)
+//
+// * JNP rel8
+// * JNP rel32
+//
+func (self *Program) JNP(v0 interface{}) *Instruction {
+ p := self.alloc("JNP", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNP rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7b)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNP rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8b)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNP label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7b)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8b)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNP")
+ }
+ return p
+}
+
+// JNS performs "Jump if not sign (SF == 0)".
+//
+// Mnemonic : JNS
+// Supported forms : (2 forms)
+//
+// * JNS rel8
+// * JNS rel32
+//
+func (self *Program) JNS(v0 interface{}) *Instruction {
+ p := self.alloc("JNS", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNS rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x79)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNS rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x89)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNS label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x79)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x89)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNS")
+ }
+ return p
+}
+
+// JNZ performs "Jump if not zero (ZF == 0)".
+//
+// Mnemonic : JNZ
+// Supported forms : (2 forms)
+//
+// * JNZ rel8
+// * JNZ rel32
+//
+func (self *Program) JNZ(v0 interface{}) *Instruction {
+ p := self.alloc("JNZ", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JNZ rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x75)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JNZ rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x85)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JNZ label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x75)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x85)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JNZ")
+ }
+ return p
+}
+
+// JO performs "Jump if overflow (OF == 1)".
+//
+// Mnemonic : JO
+// Supported forms : (2 forms)
+//
+// * JO rel8
+// * JO rel32
+//
+func (self *Program) JO(v0 interface{}) *Instruction {
+ p := self.alloc("JO", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JO rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x70)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JO rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x80)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JO label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x70)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x80)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JO")
+ }
+ return p
+}
+
+// JP performs "Jump if parity (PF == 1)".
+//
+// Mnemonic : JP
+// Supported forms : (2 forms)
+//
+// * JP rel8
+// * JP rel32
+//
+func (self *Program) JP(v0 interface{}) *Instruction {
+ p := self.alloc("JP", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JP rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7a)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JP rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8a)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JP label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7a)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8a)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JP")
+ }
+ return p
+}
+
+// JPE performs "Jump if parity even (PF == 1)".
+//
+// Mnemonic : JPE
+// Supported forms : (2 forms)
+//
+// * JPE rel8
+// * JPE rel32
+//
+func (self *Program) JPE(v0 interface{}) *Instruction {
+ p := self.alloc("JPE", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JPE rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7a)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JPE rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8a)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JPE label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7a)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8a)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JPE")
+ }
+ return p
+}
+
+// JPO performs "Jump if parity odd (PF == 0)".
+//
+// Mnemonic : JPO
+// Supported forms : (2 forms)
+//
+// * JPO rel8
+// * JPO rel32
+//
+func (self *Program) JPO(v0 interface{}) *Instruction {
+ p := self.alloc("JPO", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JPO rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7b)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JPO rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8b)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JPO label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x7b)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x8b)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JPO")
+ }
+ return p
+}
+
+// JRCXZ performs "Jump if RCX register is 0".
+//
+// Mnemonic : JRCXZ
+// Supported forms : (1 form)
+//
+// * JRCXZ rel8
+//
+func (self *Program) JRCXZ(v0 interface{}) *Instruction {
+ p := self.alloc("JRCXZ", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JRCXZ rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xe3)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JRCXZ label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0xe3)
+ m.imm1(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JRCXZ")
+ }
+ return p
+}
+
+// JS performs "Jump if sign (SF == 1)".
+//
+// Mnemonic : JS
+// Supported forms : (2 forms)
+//
+// * JS rel8
+// * JS rel32
+//
+func (self *Program) JS(v0 interface{}) *Instruction {
+ p := self.alloc("JS", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JS rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x78)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JS rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x88)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JS label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x78)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x88)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JS")
+ }
+ return p
+}
+
+// JZ performs "Jump if zero (ZF == 1)".
+//
+// Mnemonic : JZ
+// Supported forms : (2 forms)
+//
+// * JZ rel8
+// * JZ rel32
+//
+func (self *Program) JZ(v0 interface{}) *Instruction {
+ p := self.alloc("JZ", 1, Operands { v0 })
+ p.branch = _B_conditional
+ // JZ rel8
+ if isRel8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x74)
+ m.imm1(relv(v[0]))
+ })
+ }
+ // JZ rel32
+ if isRel32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x84)
+ m.imm4(relv(v[0]))
+ })
+ }
+ // JZ label
+ if isLabel(v0) {
+ p.add(_F_rel1, func(m *_Encoding, v []interface{}) {
+ m.emit(0x74)
+ m.imm1(relv(v[0]))
+ })
+ p.add(_F_rel4, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x84)
+ m.imm4(relv(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for JZ")
+ }
+ return p
+}
+
+// KADDB performs "ADD Two 8-bit Masks".
+//
+// Mnemonic : KADDB
+// Supported forms : (1 form)
+//
+// * KADDB k, k, k [AVX512DQ]
+//
+func (self *Program) KADDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KADDB", 3, Operands { v0, v1, v2 })
+ // KADDB k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, nil, hlcode(v[1]))
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KADDB")
+ }
+ return p
+}
+
+// KADDD performs "ADD Two 32-bit Masks".
+//
+// Mnemonic : KADDD
+// Supported forms : (1 form)
+//
+// * KADDD k, k, k [AVX512BW]
+//
+func (self *Program) KADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KADDD", 3, Operands { v0, v1, v2 })
+ // KADDD k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KADDD")
+ }
+ return p
+}
+
+// KADDQ performs "ADD Two 64-bit Masks".
+//
+// Mnemonic : KADDQ
+// Supported forms : (1 form)
+//
+// * KADDQ k, k, k [AVX512BW]
+//
+func (self *Program) KADDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KADDQ", 3, Operands { v0, v1, v2 })
+ // KADDQ k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfc ^ (hlcode(v[1]) << 3))
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KADDQ")
+ }
+ return p
+}
+
+// KADDW performs "ADD Two 16-bit Masks".
+//
+// Mnemonic : KADDW
+// Supported forms : (1 form)
+//
+// * KADDW k, k, k [AVX512DQ]
+//
+func (self *Program) KADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KADDW", 3, Operands { v0, v1, v2 })
+ // KADDW k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, 0, nil, hlcode(v[1]))
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KADDW")
+ }
+ return p
+}
+
+// KANDB performs "Bitwise Logical AND 8-bit Masks".
+//
+// Mnemonic : KANDB
+// Supported forms : (1 form)
+//
+// * KANDB k, k, k [AVX512DQ]
+//
+func (self *Program) KANDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KANDB", 3, Operands { v0, v1, v2 })
+ // KANDB k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, nil, hlcode(v[1]))
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KANDB")
+ }
+ return p
+}
+
+// KANDD performs "Bitwise Logical AND 32-bit Masks".
+//
+// Mnemonic : KANDD
+// Supported forms : (1 form)
+//
+// * KANDD k, k, k [AVX512BW]
+//
+func (self *Program) KANDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KANDD", 3, Operands { v0, v1, v2 })
+ // KANDD k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KANDD")
+ }
+ return p
+}
+
+// KANDNB performs "Bitwise Logical AND NOT 8-bit Masks".
+//
+// Mnemonic : KANDNB
+// Supported forms : (1 form)
+//
+// * KANDNB k, k, k [AVX512DQ]
+//
+func (self *Program) KANDNB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KANDNB", 3, Operands { v0, v1, v2 })
+ // KANDNB k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, nil, hlcode(v[1]))
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KANDNB")
+ }
+ return p
+}
+
+// KANDND performs "Bitwise Logical AND NOT 32-bit Masks".
+//
+// Mnemonic : KANDND
+// Supported forms : (1 form)
+//
+// * KANDND k, k, k [AVX512BW]
+//
+func (self *Program) KANDND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KANDND", 3, Operands { v0, v1, v2 })
+ // KANDND k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KANDND")
+ }
+ return p
+}
+
+// KANDNQ performs "Bitwise Logical AND NOT 64-bit Masks".
+//
+// Mnemonic : KANDNQ
+// Supported forms : (1 form)
+//
+// * KANDNQ k, k, k [AVX512BW]
+//
+func (self *Program) KANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KANDNQ", 3, Operands { v0, v1, v2 })
+ // KANDNQ k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfc ^ (hlcode(v[1]) << 3))
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KANDNQ")
+ }
+ return p
+}
+
+// KANDNW performs "Bitwise Logical AND NOT 16-bit Masks".
+//
+// Mnemonic : KANDNW
+// Supported forms : (1 form)
+//
+// * KANDNW k, k, k [AVX512F]
+//
+func (self *Program) KANDNW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KANDNW", 3, Operands { v0, v1, v2 })
+ // KANDNW k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, 0, nil, hlcode(v[1]))
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KANDNW")
+ }
+ return p
+}
+
+// KANDQ performs "Bitwise Logical AND 64-bit Masks".
+//
+// Mnemonic : KANDQ
+// Supported forms : (1 form)
+//
+// * KANDQ k, k, k [AVX512BW]
+//
+func (self *Program) KANDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KANDQ", 3, Operands { v0, v1, v2 })
+ // KANDQ k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfc ^ (hlcode(v[1]) << 3))
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KANDQ")
+ }
+ return p
+}
+
+// KANDW performs "Bitwise Logical AND 16-bit Masks".
+//
+// Mnemonic : KANDW
+// Supported forms : (1 form)
+//
+// * KANDW k, k, k [AVX512F]
+//
+func (self *Program) KANDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KANDW", 3, Operands { v0, v1, v2 })
+ // KANDW k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, 0, nil, hlcode(v[1]))
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KANDW")
+ }
+ return p
+}
+
+// KMOVB performs "Move 8-bit Mask".
+//
+// Mnemonic : KMOVB
+// Supported forms : (5 forms)
+//
+// * KMOVB k, k [AVX512DQ]
+// * KMOVB r32, k [AVX512DQ]
+// * KMOVB m8, k [AVX512DQ]
+// * KMOVB k, r32 [AVX512DQ]
+// * KMOVB k, m8 [AVX512DQ]
+//
+func (self *Program) KMOVB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KMOVB", 2, Operands { v0, v1 })
+ // KMOVB k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, nil, 0)
+ m.emit(0x90)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVB r32, k
+ if isReg32(v0) && isK(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[0], 0)
+ m.emit(0x92)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVB m8, k
+ if isM8(v0) && isK(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, addr(v[0]), 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // KMOVB k, r32
+ if isK(v0) && isReg32(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), nil, 0)
+ m.emit(0x93)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVB k, m8
+ if isK(v0) && isM8(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, addr(v[1]), 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KMOVB")
+ }
+ return p
+}
+
+// KMOVD performs "Move 32-bit Mask".
+//
+// Mnemonic : KMOVD
+// Supported forms : (5 forms)
+//
+// * KMOVD k, k [AVX512BW]
+// * KMOVD r32, k [AVX512BW]
+// * KMOVD m32, k [AVX512BW]
+// * KMOVD k, r32 [AVX512BW]
+// * KMOVD k, m32 [AVX512BW]
+//
+func (self *Program) KMOVD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KMOVD", 2, Operands { v0, v1 })
+ // KMOVD k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xf9)
+ m.emit(0x90)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVD r32, k
+ if isReg32(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, 0, v[0], 0)
+ m.emit(0x92)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVD m32, k
+ if isM32(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x81, 0, addr(v[0]), 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // KMOVD k, r32
+ if isK(v0) && isReg32(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), nil, 0)
+ m.emit(0x93)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVD k, m32
+ if isK(v0) && isM32(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x81, 0, addr(v[1]), 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KMOVD")
+ }
+ return p
+}
+
+// KMOVQ performs "Move 64-bit Mask".
+//
+// Mnemonic : KMOVQ
+// Supported forms : (5 forms)
+//
+// * KMOVQ k, k [AVX512BW]
+// * KMOVQ r64, k [AVX512BW]
+// * KMOVQ m64, k [AVX512BW]
+// * KMOVQ k, r64 [AVX512BW]
+// * KMOVQ k, m64 [AVX512BW]
+//
+func (self *Program) KMOVQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KMOVQ", 2, Operands { v0, v1 })
+ // KMOVQ k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xf8)
+ m.emit(0x90)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVQ r64, k
+ if isReg64(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[0]) << 5))
+ m.emit(0xfb)
+ m.emit(0x92)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVQ m64, k
+ if isM64(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x80, 0, addr(v[0]), 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // KMOVQ k, r64
+ if isK(v0) && isReg64(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[1]) << 7))
+ m.emit(0xfb)
+ m.emit(0x93)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVQ k, m64
+ if isK(v0) && isM64(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x80, 0, addr(v[1]), 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KMOVQ")
+ }
+ return p
+}
+
+// KMOVW performs "Move 16-bit Mask".
+//
+// Mnemonic : KMOVW
+// Supported forms : (5 forms)
+//
+// * KMOVW k, k [AVX512F]
+// * KMOVW r32, k [AVX512F]
+// * KMOVW m16, k [AVX512F]
+// * KMOVW k, r32 [AVX512F]
+// * KMOVW k, m16 [AVX512F]
+//
+func (self *Program) KMOVW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KMOVW", 2, Operands { v0, v1 })
+ // KMOVW k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, nil, 0)
+ m.emit(0x90)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVW r32, k
+ if isReg32(v0) && isK(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, v[0], 0)
+ m.emit(0x92)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVW m16, k
+ if isM16(v0) && isK(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, addr(v[0]), 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // KMOVW k, r32
+ if isK(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), nil, 0)
+ m.emit(0x93)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // KMOVW k, m16
+ if isK(v0) && isM16(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, addr(v[1]), 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KMOVW")
+ }
+ return p
+}
+
+// KNOTB performs "NOT 8-bit Mask Register".
+//
+// Mnemonic : KNOTB
+// Supported forms : (1 form)
+//
+// * KNOTB k, k [AVX512DQ]
+//
+func (self *Program) KNOTB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KNOTB", 2, Operands { v0, v1 })
+ // KNOTB k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, nil, 0)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KNOTB")
+ }
+ return p
+}
+
+// KNOTD performs "NOT 32-bit Mask Register".
+//
+// Mnemonic : KNOTD
+// Supported forms : (1 form)
+//
+// * KNOTD k, k [AVX512BW]
+//
+func (self *Program) KNOTD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KNOTD", 2, Operands { v0, v1 })
+ // KNOTD k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xf9)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KNOTD")
+ }
+ return p
+}
+
+// KNOTQ performs "NOT 64-bit Mask Register".
+//
+// Mnemonic : KNOTQ
+// Supported forms : (1 form)
+//
+// * KNOTQ k, k [AVX512BW]
+//
+func (self *Program) KNOTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KNOTQ", 2, Operands { v0, v1 })
+ // KNOTQ k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xf8)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KNOTQ")
+ }
+ return p
+}
+
+// KNOTW performs "NOT 16-bit Mask Register".
+//
+// Mnemonic : KNOTW
+// Supported forms : (1 form)
+//
+// * KNOTW k, k [AVX512F]
+//
+func (self *Program) KNOTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KNOTW", 2, Operands { v0, v1 })
+ // KNOTW k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, nil, 0)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KNOTW")
+ }
+ return p
+}
+
+// KORB performs "Bitwise Logical OR 8-bit Masks".
+//
+// Mnemonic : KORB
+// Supported forms : (1 form)
+//
+// * KORB k, k, k [AVX512DQ]
+//
+func (self *Program) KORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KORB", 3, Operands { v0, v1, v2 })
+ // KORB k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, nil, hlcode(v[1]))
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KORB")
+ }
+ return p
+}
+
+// KORD performs "Bitwise Logical OR 32-bit Masks".
+//
+// Mnemonic : KORD
+// Supported forms : (1 form)
+//
+// * KORD k, k, k [AVX512BW]
+//
+func (self *Program) KORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KORD", 3, Operands { v0, v1, v2 })
+ // KORD k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KORD")
+ }
+ return p
+}
+
+// KORQ performs "Bitwise Logical OR 64-bit Masks".
+//
+// Mnemonic : KORQ
+// Supported forms : (1 form)
+//
+// * KORQ k, k, k [AVX512BW]
+//
+func (self *Program) KORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KORQ", 3, Operands { v0, v1, v2 })
+ // KORQ k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfc ^ (hlcode(v[1]) << 3))
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KORQ")
+ }
+ return p
+}
+
+// KORTESTB performs "OR 8-bit Masks and Set Flags".
+//
+// Mnemonic : KORTESTB
+// Supported forms : (1 form)
+//
+// * KORTESTB k, k [AVX512DQ]
+//
+func (self *Program) KORTESTB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KORTESTB", 2, Operands { v0, v1 })
+ // KORTESTB k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, nil, 0)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KORTESTB")
+ }
+ return p
+}
+
+// KORTESTD performs "OR 32-bit Masks and Set Flags".
+//
+// Mnemonic : KORTESTD
+// Supported forms : (1 form)
+//
+// * KORTESTD k, k [AVX512BW]
+//
+func (self *Program) KORTESTD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KORTESTD", 2, Operands { v0, v1 })
+ // KORTESTD k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xf9)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KORTESTD")
+ }
+ return p
+}
+
+// KORTESTQ performs "OR 64-bit Masks and Set Flags".
+//
+// Mnemonic : KORTESTQ
+// Supported forms : (1 form)
+//
+// * KORTESTQ k, k [AVX512BW]
+//
+func (self *Program) KORTESTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KORTESTQ", 2, Operands { v0, v1 })
+ // KORTESTQ k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xf8)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KORTESTQ")
+ }
+ return p
+}
+
+// KORTESTW performs "OR 16-bit Masks and Set Flags".
+//
+// Mnemonic : KORTESTW
+// Supported forms : (1 form)
+//
+// * KORTESTW k, k [AVX512F]
+//
+func (self *Program) KORTESTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KORTESTW", 2, Operands { v0, v1 })
+ // KORTESTW k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, nil, 0)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KORTESTW")
+ }
+ return p
+}
+
+// KORW performs "Bitwise Logical OR 16-bit Masks".
+//
+// Mnemonic : KORW
+// Supported forms : (1 form)
+//
+// * KORW k, k, k [AVX512F]
+//
+func (self *Program) KORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KORW", 3, Operands { v0, v1, v2 })
+ // KORW k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, 0, nil, hlcode(v[1]))
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KORW")
+ }
+ return p
+}
+
+// KSHIFTLB performs "Shift Left 8-bit Masks".
+//
+// Mnemonic : KSHIFTLB
+// Supported forms : (1 form)
+//
+// * KSHIFTLB imm8, k, k [AVX512DQ]
+//
+func (self *Program) KSHIFTLB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KSHIFTLB", 3, Operands { v0, v1, v2 })
+ // KSHIFTLB imm8, k, k
+ if isImm8(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3)
+ m.emit(0x79)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KSHIFTLB")
+ }
+ return p
+}
+
+// KSHIFTLD performs "Shift Left 32-bit Masks".
+//
+// Mnemonic : KSHIFTLD
+// Supported forms : (1 form)
+//
+// * KSHIFTLD imm8, k, k [AVX512BW]
+//
+func (self *Program) KSHIFTLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KSHIFTLD", 3, Operands { v0, v1, v2 })
+ // KSHIFTLD imm8, k, k
+ if isImm8(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3)
+ m.emit(0x79)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KSHIFTLD")
+ }
+ return p
+}
+
+// KSHIFTLQ performs "Shift Left 64-bit Masks".
+//
+// Mnemonic : KSHIFTLQ
+// Supported forms : (1 form)
+//
+// * KSHIFTLQ imm8, k, k [AVX512BW]
+//
+func (self *Program) KSHIFTLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KSHIFTLQ", 3, Operands { v0, v1, v2 })
+ // KSHIFTLQ imm8, k, k
+ if isImm8(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3)
+ m.emit(0xf9)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KSHIFTLQ")
+ }
+ return p
+}
+
+// KSHIFTLW performs "Shift Left 16-bit Masks".
+//
+// Mnemonic : KSHIFTLW
+// Supported forms : (1 form)
+//
+// * KSHIFTLW imm8, k, k [AVX512F]
+//
+func (self *Program) KSHIFTLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KSHIFTLW", 3, Operands { v0, v1, v2 })
+ // KSHIFTLW imm8, k, k
+ if isImm8(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3)
+ m.emit(0xf9)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KSHIFTLW")
+ }
+ return p
+}
+
+// KSHIFTRB performs "Shift Right 8-bit Masks".
+//
+// Mnemonic : KSHIFTRB
+// Supported forms : (1 form)
+//
+// * KSHIFTRB imm8, k, k [AVX512DQ]
+//
+func (self *Program) KSHIFTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KSHIFTRB", 3, Operands { v0, v1, v2 })
+ // KSHIFTRB imm8, k, k
+ if isImm8(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3)
+ m.emit(0x79)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KSHIFTRB")
+ }
+ return p
+}
+
+// KSHIFTRD performs "Shift Right 32-bit Masks".
+//
+// Mnemonic : KSHIFTRD
+// Supported forms : (1 form)
+//
+// * KSHIFTRD imm8, k, k [AVX512BW]
+//
+func (self *Program) KSHIFTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KSHIFTRD", 3, Operands { v0, v1, v2 })
+ // KSHIFTRD imm8, k, k
+ if isImm8(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3)
+ m.emit(0x79)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KSHIFTRD")
+ }
+ return p
+}
+
+// KSHIFTRQ performs "Shift Right 64-bit Masks".
+//
+// Mnemonic : KSHIFTRQ
+// Supported forms : (1 form)
+//
+// * KSHIFTRQ imm8, k, k [AVX512BW]
+//
+func (self *Program) KSHIFTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KSHIFTRQ", 3, Operands { v0, v1, v2 })
+ // KSHIFTRQ imm8, k, k
+ if isImm8(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3)
+ m.emit(0xf9)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KSHIFTRQ")
+ }
+ return p
+}
+
+// KSHIFTRW performs "Shift Right 16-bit Masks".
+//
+// Mnemonic : KSHIFTRW
+// Supported forms : (1 form)
+//
+// * KSHIFTRW imm8, k, k [AVX512F]
+//
+func (self *Program) KSHIFTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KSHIFTRW", 3, Operands { v0, v1, v2 })
+ // KSHIFTRW imm8, k, k
+ if isImm8(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3)
+ m.emit(0xf9)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KSHIFTRW")
+ }
+ return p
+}
+
+// KTESTB performs "Bit Test 8-bit Masks and Set Flags".
+//
+// Mnemonic : KTESTB
+// Supported forms : (1 form)
+//
+// * KTESTB k, k [AVX512DQ]
+//
+func (self *Program) KTESTB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KTESTB", 2, Operands { v0, v1 })
+ // KTESTB k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, nil, 0)
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KTESTB")
+ }
+ return p
+}
+
+// KTESTD performs "Bit Test 32-bit Masks and Set Flags".
+//
+// Mnemonic : KTESTD
+// Supported forms : (1 form)
+//
+// * KTESTD k, k [AVX512BW]
+//
+func (self *Program) KTESTD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KTESTD", 2, Operands { v0, v1 })
+ // KTESTD k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xf9)
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KTESTD")
+ }
+ return p
+}
+
+// KTESTQ performs "Bit Test 64-bit Masks and Set Flags".
+//
+// Mnemonic : KTESTQ
+// Supported forms : (1 form)
+//
+// * KTESTQ k, k [AVX512BW]
+//
+func (self *Program) KTESTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KTESTQ", 2, Operands { v0, v1 })
+ // KTESTQ k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xf8)
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KTESTQ")
+ }
+ return p
+}
+
+// KTESTW performs "Bit Test 16-bit Masks and Set Flags".
+//
+// Mnemonic : KTESTW
+// Supported forms : (1 form)
+//
+// * KTESTW k, k [AVX512DQ]
+//
+func (self *Program) KTESTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("KTESTW", 2, Operands { v0, v1 })
+ // KTESTW k, k
+ if isK(v0) && isK(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, nil, 0)
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KTESTW")
+ }
+ return p
+}
+
+// KUNPCKBW performs "Unpack and Interleave 8-bit Masks".
+//
+// Mnemonic : KUNPCKBW
+// Supported forms : (1 form)
+//
+// * KUNPCKBW k, k, k [AVX512F]
+//
+func (self *Program) KUNPCKBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KUNPCKBW", 3, Operands { v0, v1, v2 })
+ // KUNPCKBW k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, nil, hlcode(v[1]))
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KUNPCKBW")
+ }
+ return p
+}
+
+// KUNPCKDQ performs "Unpack and Interleave 32-bit Masks".
+//
+// Mnemonic : KUNPCKDQ
+// Supported forms : (1 form)
+//
+// * KUNPCKDQ k, k, k [AVX512BW]
+//
+func (self *Program) KUNPCKDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KUNPCKDQ", 3, Operands { v0, v1, v2 })
+ // KUNPCKDQ k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfc ^ (hlcode(v[1]) << 3))
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KUNPCKDQ")
+ }
+ return p
+}
+
+// KUNPCKWD performs "Unpack and Interleave 16-bit Masks".
+//
+// Mnemonic : KUNPCKWD
+// Supported forms : (1 form)
+//
+// * KUNPCKWD k, k, k [AVX512BW]
+//
+func (self *Program) KUNPCKWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KUNPCKWD", 3, Operands { v0, v1, v2 })
+ // KUNPCKWD k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, 0, nil, hlcode(v[1]))
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KUNPCKWD")
+ }
+ return p
+}
+
+// KXNORB performs "Bitwise Logical XNOR 8-bit Masks".
+//
+// Mnemonic : KXNORB
+// Supported forms : (1 form)
+//
+// * KXNORB k, k, k [AVX512DQ]
+//
+func (self *Program) KXNORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KXNORB", 3, Operands { v0, v1, v2 })
+ // KXNORB k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, nil, hlcode(v[1]))
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KXNORB")
+ }
+ return p
+}
+
+// KXNORD performs "Bitwise Logical XNOR 32-bit Masks".
+//
+// Mnemonic : KXNORD
+// Supported forms : (1 form)
+//
+// * KXNORD k, k, k [AVX512BW]
+//
+func (self *Program) KXNORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KXNORD", 3, Operands { v0, v1, v2 })
+ // KXNORD k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KXNORD")
+ }
+ return p
+}
+
+// KXNORQ performs "Bitwise Logical XNOR 64-bit Masks".
+//
+// Mnemonic : KXNORQ
+// Supported forms : (1 form)
+//
+// * KXNORQ k, k, k [AVX512BW]
+//
+func (self *Program) KXNORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KXNORQ", 3, Operands { v0, v1, v2 })
+ // KXNORQ k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfc ^ (hlcode(v[1]) << 3))
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KXNORQ")
+ }
+ return p
+}
+
+// KXNORW performs "Bitwise Logical XNOR 16-bit Masks".
+//
+// Mnemonic : KXNORW
+// Supported forms : (1 form)
+//
+// * KXNORW k, k, k [AVX512F]
+//
+func (self *Program) KXNORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KXNORW", 3, Operands { v0, v1, v2 })
+ // KXNORW k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, 0, nil, hlcode(v[1]))
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KXNORW")
+ }
+ return p
+}
+
+// KXORB performs "Bitwise Logical XOR 8-bit Masks".
+//
+// Mnemonic : KXORB
+// Supported forms : (1 form)
+//
+// * KXORB k, k, k [AVX512DQ]
+//
+func (self *Program) KXORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KXORB", 3, Operands { v0, v1, v2 })
+ // KXORB k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, nil, hlcode(v[1]))
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KXORB")
+ }
+ return p
+}
+
+// KXORD performs "Bitwise Logical XOR 32-bit Masks".
+//
+// Mnemonic : KXORD
+// Supported forms : (1 form)
+//
+// * KXORD k, k, k [AVX512BW]
+//
+func (self *Program) KXORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KXORD", 3, Operands { v0, v1, v2 })
+ // KXORD k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KXORD")
+ }
+ return p
+}
+
+// KXORQ performs "Bitwise Logical XOR 64-bit Masks".
+//
+// Mnemonic : KXORQ
+// Supported forms : (1 form)
+//
+// * KXORQ k, k, k [AVX512BW]
+//
+func (self *Program) KXORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KXORQ", 3, Operands { v0, v1, v2 })
+ // KXORQ k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1)
+ m.emit(0xfc ^ (hlcode(v[1]) << 3))
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KXORQ")
+ }
+ return p
+}
+
+// KXORW performs "Bitwise Logical XOR 16-bit Masks".
+//
+// Mnemonic : KXORW
+// Supported forms : (1 form)
+//
+// * KXORW k, k, k [AVX512F]
+//
+func (self *Program) KXORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("KXORW", 3, Operands { v0, v1, v2 })
+ // KXORW k, k, k
+ if isK(v0) && isK(v1) && isK(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainMask
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, 0, nil, hlcode(v[1]))
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for KXORW")
+ }
+ return p
+}
+
+// LDDQU performs "Load Unaligned Integer 128 Bits".
+//
+// Mnemonic : LDDQU
+// Supported forms : (1 form)
+//
+// * LDDQU m128, xmm [SSE3]
+//
+func (self *Program) LDDQU(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("LDDQU", 2, Operands { v0, v1 })
+ // LDDQU m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for LDDQU")
+ }
+ return p
+}
+
+// LDMXCSR performs "Load MXCSR Register".
+//
+// Mnemonic : LDMXCSR
+// Supported forms : (1 form)
+//
+// * LDMXCSR m32 [SSE]
+//
+func (self *Program) LDMXCSR(v0 interface{}) *Instruction {
+ p := self.alloc("LDMXCSR", 1, Operands { v0 })
+ // LDMXCSR m32
+ if isM32(v0) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xae)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for LDMXCSR")
+ }
+ return p
+}
+
+// LEAL performs "Load Effective Address".
+//
+// Mnemonic : LEA
+// Supported forms : (1 form)
+//
+// * LEAL m, r32
+//
+func (self *Program) LEAL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("LEAL", 2, Operands { v0, v1 })
+ // LEAL m, r32
+ if isM(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x8d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for LEAL")
+ }
+ return p
+}
+
+// LEAQ performs "Load Effective Address".
+//
+// Mnemonic : LEA
+// Supported forms : (1 form)
+//
+// * LEAQ m, r64
+//
+func (self *Program) LEAQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("LEAQ", 2, Operands { v0, v1 })
+ // LEAQ m, r64
+ if isM(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x8d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for LEAQ")
+ }
+ return p
+}
+
+// LEAW performs "Load Effective Address".
+//
+// Mnemonic : LEA
+// Supported forms : (1 form)
+//
+// * LEAW m, r16
+//
+func (self *Program) LEAW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("LEAW", 2, Operands { v0, v1 })
+ // LEAW m, r16
+ if isM(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x8d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for LEAW")
+ }
+ return p
+}
+
+// LFENCE performs "Load Fence".
+//
+// Mnemonic : LFENCE
+// Supported forms : (1 form)
+//
+// * LFENCE [SSE2]
+//
+func (self *Program) LFENCE() *Instruction {
+ p := self.alloc("LFENCE", 0, Operands { })
+ // LFENCE
+ self.require(ISA_SSE2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0xae)
+ m.emit(0xe8)
+ })
+ return p
+}
+
+// LZCNTL performs "Count the Number of Leading Zero Bits".
+//
+// Mnemonic : LZCNT
+// Supported forms : (2 forms)
+//
+// * LZCNTL r32, r32 [LZCNT]
+// * LZCNTL m32, r32 [LZCNT]
+//
+func (self *Program) LZCNTL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("LZCNTL", 2, Operands { v0, v1 })
+ // LZCNTL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_LZCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // LZCNTL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_LZCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for LZCNTL")
+ }
+ return p
+}
+
+// LZCNTQ performs "Count the Number of Leading Zero Bits".
+//
+// Mnemonic : LZCNT
+// Supported forms : (2 forms)
+//
+// * LZCNTQ r64, r64 [LZCNT]
+// * LZCNTQ m64, r64 [LZCNT]
+//
+func (self *Program) LZCNTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("LZCNTQ", 2, Operands { v0, v1 })
+ // LZCNTQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_LZCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // LZCNTQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_LZCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for LZCNTQ")
+ }
+ return p
+}
+
+// LZCNTW performs "Count the Number of Leading Zero Bits".
+//
+// Mnemonic : LZCNT
+// Supported forms : (2 forms)
+//
+// * LZCNTW r16, r16 [LZCNT]
+// * LZCNTW m16, r16 [LZCNT]
+//
+func (self *Program) LZCNTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("LZCNTW", 2, Operands { v0, v1 })
+ // LZCNTW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_LZCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // LZCNTW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_LZCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for LZCNTW")
+ }
+ return p
+}
+
+// MASKMOVDQU performs "Store Selected Bytes of Double Quadword".
+//
+// Mnemonic : MASKMOVDQU
+// Supported forms : (1 form)
+//
+// * MASKMOVDQU xmm, xmm [SSE2]
+//
+func (self *Program) MASKMOVDQU(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MASKMOVDQU", 2, Operands { v0, v1 })
+ // MASKMOVDQU xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MASKMOVDQU")
+ }
+ return p
+}
+
+// MASKMOVQ performs "Store Selected Bytes of Quadword".
+//
+// Mnemonic : MASKMOVQ
+// Supported forms : (1 form)
+//
+// * MASKMOVQ mm, mm [MMX+]
+//
+func (self *Program) MASKMOVQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MASKMOVQ", 2, Operands { v0, v1 })
+ // MASKMOVQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MASKMOVQ")
+ }
+ return p
+}
+
+// MAXPD performs "Return Maximum Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : MAXPD
+// Supported forms : (2 forms)
+//
+// * MAXPD xmm, xmm [SSE2]
+// * MAXPD m128, xmm [SSE2]
+//
+func (self *Program) MAXPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MAXPD", 2, Operands { v0, v1 })
+ // MAXPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MAXPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MAXPD")
+ }
+ return p
+}
+
+// MAXPS performs "Return Maximum Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : MAXPS
+// Supported forms : (2 forms)
+//
+// * MAXPS xmm, xmm [SSE]
+// * MAXPS m128, xmm [SSE]
+//
+func (self *Program) MAXPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MAXPS", 2, Operands { v0, v1 })
+ // MAXPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MAXPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MAXPS")
+ }
+ return p
+}
+
+// MAXSD performs "Return Maximum Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : MAXSD
+// Supported forms : (2 forms)
+//
+// * MAXSD xmm, xmm [SSE2]
+// * MAXSD m64, xmm [SSE2]
+//
+func (self *Program) MAXSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MAXSD", 2, Operands { v0, v1 })
+ // MAXSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MAXSD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MAXSD")
+ }
+ return p
+}
+
+// MAXSS performs "Return Maximum Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : MAXSS
+// Supported forms : (2 forms)
+//
+// * MAXSS xmm, xmm [SSE]
+// * MAXSS m32, xmm [SSE]
+//
+func (self *Program) MAXSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MAXSS", 2, Operands { v0, v1 })
+ // MAXSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MAXSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MAXSS")
+ }
+ return p
+}
+
+// MFENCE performs "Memory Fence".
+//
+// Mnemonic : MFENCE
+// Supported forms : (1 form)
+//
+// * MFENCE [SSE2]
+//
+func (self *Program) MFENCE() *Instruction {
+ p := self.alloc("MFENCE", 0, Operands { })
+ // MFENCE
+ self.require(ISA_SSE2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0xae)
+ m.emit(0xf0)
+ })
+ return p
+}
+
+// MINPD performs "Return Minimum Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : MINPD
+// Supported forms : (2 forms)
+//
+// * MINPD xmm, xmm [SSE2]
+// * MINPD m128, xmm [SSE2]
+//
+func (self *Program) MINPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MINPD", 2, Operands { v0, v1 })
+ // MINPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MINPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MINPD")
+ }
+ return p
+}
+
+// MINPS performs "Return Minimum Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : MINPS
+// Supported forms : (2 forms)
+//
+// * MINPS xmm, xmm [SSE]
+// * MINPS m128, xmm [SSE]
+//
+func (self *Program) MINPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MINPS", 2, Operands { v0, v1 })
+ // MINPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MINPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MINPS")
+ }
+ return p
+}
+
+// MINSD performs "Return Minimum Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : MINSD
+// Supported forms : (2 forms)
+//
+// * MINSD xmm, xmm [SSE2]
+// * MINSD m64, xmm [SSE2]
+//
+func (self *Program) MINSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MINSD", 2, Operands { v0, v1 })
+ // MINSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MINSD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MINSD")
+ }
+ return p
+}
+
+// MINSS performs "Return Minimum Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : MINSS
+// Supported forms : (2 forms)
+//
+// * MINSS xmm, xmm [SSE]
+// * MINSS m32, xmm [SSE]
+//
+func (self *Program) MINSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MINSS", 2, Operands { v0, v1 })
+ // MINSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MINSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MINSS")
+ }
+ return p
+}
+
+// MONITOR performs "Monitor a Linear Address Range".
+//
+// Mnemonic : MONITOR
+// Supported forms : (1 form)
+//
+// * MONITOR [MONITOR]
+//
+func (self *Program) MONITOR() *Instruction {
+ p := self.alloc("MONITOR", 0, Operands { })
+ // MONITOR
+ self.require(ISA_MONITOR)
+ p.domain = DomainMisc
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x01)
+ m.emit(0xc8)
+ })
+ return p
+}
+
+// MONITORX performs "Monitor a Linear Address Range with Timeout".
+//
+// Mnemonic : MONITORX
+// Supported forms : (1 form)
+//
+// * MONITORX [MONITORX]
+//
+func (self *Program) MONITORX() *Instruction {
+ p := self.alloc("MONITORX", 0, Operands { })
+ // MONITORX
+ self.require(ISA_MONITORX)
+ p.domain = DomainMisc
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x01)
+ m.emit(0xfa)
+ })
+ return p
+}
+
+// MOVAPD performs "Move Aligned Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : MOVAPD
+// Supported forms : (3 forms)
+//
+// * MOVAPD xmm, xmm [SSE2]
+// * MOVAPD m128, xmm [SSE2]
+// * MOVAPD xmm, m128 [SSE2]
+//
+func (self *Program) MOVAPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVAPD", 2, Operands { v0, v1 })
+ // MOVAPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVAPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVAPD xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVAPD")
+ }
+ return p
+}
+
+// MOVAPS performs "Move Aligned Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : MOVAPS
+// Supported forms : (3 forms)
+//
+// * MOVAPS xmm, xmm [SSE]
+// * MOVAPS m128, xmm [SSE]
+// * MOVAPS xmm, m128 [SSE]
+//
+func (self *Program) MOVAPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVAPS", 2, Operands { v0, v1 })
+ // MOVAPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVAPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVAPS xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVAPS")
+ }
+ return p
+}
+
+// MOVB performs "Move".
+//
+// Mnemonic : MOV
+// Supported forms : (5 forms)
+//
+// * MOVB imm8, r8
+// * MOVB r8, r8
+// * MOVB m8, r8
+// * MOVB imm8, m8
+// * MOVB r8, m8
+//
+func (self *Program) MOVB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVB", 2, Operands { v0, v1 })
+ // MOVB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xb0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // MOVB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x88)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x8a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x8a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc6)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // MOVB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x88)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVB")
+ }
+ return p
+}
+
+// MOVBEL performs "Move Data After Swapping Bytes".
+//
+// Mnemonic : MOVBE
+// Supported forms : (2 forms)
+//
+// * MOVBEL m32, r32 [MOVBE]
+// * MOVBEL r32, m32 [MOVBE]
+//
+func (self *Program) MOVBEL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVBEL", 2, Operands { v0, v1 })
+ // MOVBEL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_MOVBE)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVBEL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ self.require(ISA_MOVBE)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVBEL")
+ }
+ return p
+}
+
+// MOVBEQ performs "Move Data After Swapping Bytes".
+//
+// Mnemonic : MOVBE
+// Supported forms : (2 forms)
+//
+// * MOVBEQ m64, r64 [MOVBE]
+// * MOVBEQ r64, m64 [MOVBE]
+//
+func (self *Program) MOVBEQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVBEQ", 2, Operands { v0, v1 })
+ // MOVBEQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_MOVBE)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVBEQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ self.require(ISA_MOVBE)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVBEQ")
+ }
+ return p
+}
+
+// MOVBEW performs "Move Data After Swapping Bytes".
+//
+// Mnemonic : MOVBE
+// Supported forms : (2 forms)
+//
+// * MOVBEW m16, r16 [MOVBE]
+// * MOVBEW r16, m16 [MOVBE]
+//
+func (self *Program) MOVBEW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVBEW", 2, Operands { v0, v1 })
+ // MOVBEW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_MOVBE)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVBEW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ self.require(ISA_MOVBE)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVBEW")
+ }
+ return p
+}
+
+// MOVD performs "Move Doubleword".
+//
+// Mnemonic : MOVD
+// Supported forms : (8 forms)
+//
+// * MOVD mm, r32 [MMX]
+// * MOVD r32, mm [MMX]
+// * MOVD m32, mm [MMX]
+// * MOVD mm, m32 [MMX]
+// * MOVD xmm, r32 [SSE2]
+// * MOVD r32, xmm [SSE2]
+// * MOVD m32, xmm [SSE2]
+// * MOVD xmm, m32 [SSE2]
+//
+func (self *Program) MOVD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVD", 2, Operands { v0, v1 })
+ // MOVD mm, r32
+ if isMM(v0) && isReg32(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVD r32, mm
+ if isReg32(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVD m32, mm
+ if isM32(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVD mm, m32
+ if isMM(v0) && isM32(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // MOVD xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVD r32, xmm
+ if isReg32(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVD m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVD xmm, m32
+ if isXMM(v0) && isM32(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVD")
+ }
+ return p
+}
+
+// MOVDDUP performs "Move One Double-FP and Duplicate".
+//
+// Mnemonic : MOVDDUP
+// Supported forms : (2 forms)
+//
+// * MOVDDUP xmm, xmm [SSE3]
+// * MOVDDUP m64, xmm [SSE3]
+//
+func (self *Program) MOVDDUP(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVDDUP", 2, Operands { v0, v1 })
+ // MOVDDUP xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVDDUP m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVDDUP")
+ }
+ return p
+}
+
+// MOVDQ2Q performs "Move Quadword from XMM to MMX Technology Register".
+//
+// Mnemonic : MOVDQ2Q
+// Supported forms : (1 form)
+//
+// * MOVDQ2Q xmm, mm [SSE2]
+//
+func (self *Program) MOVDQ2Q(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVDQ2Q", 2, Operands { v0, v1 })
+ // MOVDQ2Q xmm, mm
+ if isXMM(v0) && isMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVDQ2Q")
+ }
+ return p
+}
+
+// MOVDQA performs "Move Aligned Double Quadword".
+//
+// Mnemonic : MOVDQA
+// Supported forms : (3 forms)
+//
+// * MOVDQA xmm, xmm [SSE2]
+// * MOVDQA m128, xmm [SSE2]
+// * MOVDQA xmm, m128 [SSE2]
+//
+func (self *Program) MOVDQA(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVDQA", 2, Operands { v0, v1 })
+ // MOVDQA xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVDQA m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVDQA xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVDQA")
+ }
+ return p
+}
+
+// MOVDQU performs "Move Unaligned Double Quadword".
+//
+// Mnemonic : MOVDQU
+// Supported forms : (3 forms)
+//
+// * MOVDQU xmm, xmm [SSE2]
+// * MOVDQU m128, xmm [SSE2]
+// * MOVDQU xmm, m128 [SSE2]
+//
+func (self *Program) MOVDQU(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVDQU", 2, Operands { v0, v1 })
+ // MOVDQU xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVDQU m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVDQU xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVDQU")
+ }
+ return p
+}
+
+// MOVHLPS performs "Move Packed Single-Precision Floating-Point Values High to Low".
+//
+// Mnemonic : MOVHLPS
+// Supported forms : (1 form)
+//
+// * MOVHLPS xmm, xmm [SSE]
+//
+func (self *Program) MOVHLPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVHLPS", 2, Operands { v0, v1 })
+ // MOVHLPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVHLPS")
+ }
+ return p
+}
+
+// MOVHPD performs "Move High Packed Double-Precision Floating-Point Value".
+//
+// Mnemonic : MOVHPD
+// Supported forms : (2 forms)
+//
+// * MOVHPD m64, xmm [SSE2]
+// * MOVHPD xmm, m64 [SSE2]
+//
+func (self *Program) MOVHPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVHPD", 2, Operands { v0, v1 })
+ // MOVHPD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVHPD xmm, m64
+ if isXMM(v0) && isM64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x17)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVHPD")
+ }
+ return p
+}
+
+// MOVHPS performs "Move High Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : MOVHPS
+// Supported forms : (2 forms)
+//
+// * MOVHPS m64, xmm [SSE]
+// * MOVHPS xmm, m64 [SSE]
+//
+func (self *Program) MOVHPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVHPS", 2, Operands { v0, v1 })
+ // MOVHPS m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVHPS xmm, m64
+ if isXMM(v0) && isM64(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x17)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVHPS")
+ }
+ return p
+}
+
+// MOVL performs "Move".
+//
+// Mnemonic : MOV
+// Supported forms : (5 forms)
+//
+// * MOVL imm32, r32
+// * MOVL r32, r32
+// * MOVL m32, r32
+// * MOVL imm32, m32
+// * MOVL r32, m32
+//
+func (self *Program) MOVL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVL", 2, Operands { v0, v1 })
+ // MOVL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xc7)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xb8 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // MOVL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x89)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x8b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x8b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc7)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // MOVL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x89)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVL")
+ }
+ return p
+}
+
+// MOVLHPS performs "Move Packed Single-Precision Floating-Point Values Low to High".
+//
+// Mnemonic : MOVLHPS
+// Supported forms : (1 form)
+//
+// * MOVLHPS xmm, xmm [SSE]
+//
+func (self *Program) MOVLHPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVLHPS", 2, Operands { v0, v1 })
+ // MOVLHPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVLHPS")
+ }
+ return p
+}
+
+// MOVLPD performs "Move Low Packed Double-Precision Floating-Point Value".
+//
+// Mnemonic : MOVLPD
+// Supported forms : (2 forms)
+//
+// * MOVLPD m64, xmm [SSE2]
+// * MOVLPD xmm, m64 [SSE2]
+//
+func (self *Program) MOVLPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVLPD", 2, Operands { v0, v1 })
+ // MOVLPD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVLPD xmm, m64
+ if isXMM(v0) && isM64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x13)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVLPD")
+ }
+ return p
+}
+
+// MOVLPS performs "Move Low Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : MOVLPS
+// Supported forms : (2 forms)
+//
+// * MOVLPS m64, xmm [SSE]
+// * MOVLPS xmm, m64 [SSE]
+//
+func (self *Program) MOVLPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVLPS", 2, Operands { v0, v1 })
+ // MOVLPS m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVLPS xmm, m64
+ if isXMM(v0) && isM64(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x13)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVLPS")
+ }
+ return p
+}
+
+// MOVMSKPD performs "Extract Packed Double-Precision Floating-Point Sign Mask".
+//
+// Mnemonic : MOVMSKPD
+// Supported forms : (1 form)
+//
+// * MOVMSKPD xmm, r32 [SSE2]
+//
+func (self *Program) MOVMSKPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVMSKPD", 2, Operands { v0, v1 })
+ // MOVMSKPD xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVMSKPD")
+ }
+ return p
+}
+
+// MOVMSKPS performs "Extract Packed Single-Precision Floating-Point Sign Mask".
+//
+// Mnemonic : MOVMSKPS
+// Supported forms : (1 form)
+//
+// * MOVMSKPS xmm, r32 [SSE]
+//
+func (self *Program) MOVMSKPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVMSKPS", 2, Operands { v0, v1 })
+ // MOVMSKPS xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVMSKPS")
+ }
+ return p
+}
+
+// MOVNTDQ performs "Store Double Quadword Using Non-Temporal Hint".
+//
+// Mnemonic : MOVNTDQ
+// Supported forms : (1 form)
+//
+// * MOVNTDQ xmm, m128 [SSE2]
+//
+func (self *Program) MOVNTDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVNTDQ", 2, Operands { v0, v1 })
+ // MOVNTDQ xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xe7)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVNTDQ")
+ }
+ return p
+}
+
+// MOVNTDQA performs "Load Double Quadword Non-Temporal Aligned Hint".
+//
+// Mnemonic : MOVNTDQA
+// Supported forms : (1 form)
+//
+// * MOVNTDQA m128, xmm [SSE4.1]
+//
+func (self *Program) MOVNTDQA(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVNTDQA", 2, Operands { v0, v1 })
+ // MOVNTDQA m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVNTDQA")
+ }
+ return p
+}
+
+// MOVNTIL performs "Store Doubleword Using Non-Temporal Hint".
+//
+// Mnemonic : MOVNTI
+// Supported forms : (1 form)
+//
+// * MOVNTIL r32, m32 [SSE2]
+//
+func (self *Program) MOVNTIL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVNTIL", 2, Operands { v0, v1 })
+ // MOVNTIL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVNTIL")
+ }
+ return p
+}
+
+// MOVNTIQ performs "Store Doubleword Using Non-Temporal Hint".
+//
+// Mnemonic : MOVNTI
+// Supported forms : (1 form)
+//
+// * MOVNTIQ r64, m64 [SSE2]
+//
+func (self *Program) MOVNTIQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVNTIQ", 2, Operands { v0, v1 })
+ // MOVNTIQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xc3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVNTIQ")
+ }
+ return p
+}
+
+// MOVNTPD performs "Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint".
+//
+// Mnemonic : MOVNTPD
+// Supported forms : (1 form)
+//
+// * MOVNTPD xmm, m128 [SSE2]
+//
+func (self *Program) MOVNTPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVNTPD", 2, Operands { v0, v1 })
+ // MOVNTPD xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVNTPD")
+ }
+ return p
+}
+
+// MOVNTPS performs "Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint".
+//
+// Mnemonic : MOVNTPS
+// Supported forms : (1 form)
+//
+// * MOVNTPS xmm, m128 [SSE]
+//
+func (self *Program) MOVNTPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVNTPS", 2, Operands { v0, v1 })
+ // MOVNTPS xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVNTPS")
+ }
+ return p
+}
+
+// MOVNTQ performs "Store of Quadword Using Non-Temporal Hint".
+//
+// Mnemonic : MOVNTQ
+// Supported forms : (1 form)
+//
+// * MOVNTQ mm, m64 [MMX+]
+//
+func (self *Program) MOVNTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVNTQ", 2, Operands { v0, v1 })
+ // MOVNTQ mm, m64
+ if isMM(v0) && isM64(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xe7)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVNTQ")
+ }
+ return p
+}
+
+// MOVNTSD performs "Store Scalar Double-Precision Floating-Point Values Using Non-Temporal Hint".
+//
+// Mnemonic : MOVNTSD
+// Supported forms : (1 form)
+//
+// * MOVNTSD xmm, m64 [SSE4A]
+//
+func (self *Program) MOVNTSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVNTSD", 2, Operands { v0, v1 })
+ // MOVNTSD xmm, m64
+ if isXMM(v0) && isM64(v1) {
+ self.require(ISA_SSE4A)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVNTSD")
+ }
+ return p
+}
+
+// MOVNTSS performs "Store Scalar Single-Precision Floating-Point Values Using Non-Temporal Hint".
+//
+// Mnemonic : MOVNTSS
+// Supported forms : (1 form)
+//
+// * MOVNTSS xmm, m32 [SSE4A]
+//
+func (self *Program) MOVNTSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVNTSS", 2, Operands { v0, v1 })
+ // MOVNTSS xmm, m32
+ if isXMM(v0) && isM32(v1) {
+ self.require(ISA_SSE4A)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVNTSS")
+ }
+ return p
+}
+
+// MOVQ performs "Move".
+//
+// Mnemonic : MOV
+// Supported forms : (16 forms)
+//
+// * MOVQ imm32, r64
+// * MOVQ imm64, r64
+// * MOVQ r64, r64
+// * MOVQ m64, r64
+// * MOVQ imm32, m64
+// * MOVQ r64, m64
+// * MOVQ mm, r64 [MMX]
+// * MOVQ r64, mm [MMX]
+// * MOVQ mm, mm [MMX]
+// * MOVQ m64, mm [MMX]
+// * MOVQ mm, m64 [MMX]
+// * MOVQ xmm, r64 [SSE2]
+// * MOVQ r64, xmm [SSE2]
+// * MOVQ xmm, xmm [SSE2]
+// * MOVQ m64, xmm [SSE2]
+// * MOVQ xmm, m64 [SSE2]
+//
+func (self *Program) MOVQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVQ", 2, Operands { v0, v1 })
+ // MOVQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xc7)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // MOVQ imm64, r64
+ if isImm64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xb8 | lcode(v[1]))
+ m.imm8(toImmAny(v[0]))
+ })
+ }
+ // MOVQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x89)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x8b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x8b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xc7)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // MOVQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x89)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // MOVQ mm, r64
+ if isMM(v0) && isReg64(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVQ r64, mm
+ if isReg64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVQ m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x6e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVQ mm, m64
+ if isMM(v0) && isM64(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // MOVQ xmm, r64
+ if isXMM(v0) && isReg64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVQ r64, xmm
+ if isReg64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xd6)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVQ m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0x6e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVQ xmm, m64
+ if isXMM(v0) && isM64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xd6)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVQ")
+ }
+ return p
+}
+
+// MOVQ2DQ performs "Move Quadword from MMX Technology to XMM Register".
+//
+// Mnemonic : MOVQ2DQ
+// Supported forms : (1 form)
+//
+// * MOVQ2DQ mm, xmm [SSE2]
+//
+func (self *Program) MOVQ2DQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVQ2DQ", 2, Operands { v0, v1 })
+ // MOVQ2DQ mm, xmm
+ if isMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVQ2DQ")
+ }
+ return p
+}
+
+// MOVSBL performs "Move with Sign-Extension".
+//
+// Mnemonic : MOVSX
+// Supported forms : (2 forms)
+//
+// * MOVSBL r8, r32
+// * MOVSBL m8, r32
+//
+func (self *Program) MOVSBL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSBL", 2, Operands { v0, v1 })
+ // MOVSBL r8, r32
+ if isReg8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVSBL m8, r32
+ if isM8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbe)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSBL")
+ }
+ return p
+}
+
+// MOVSBQ performs "Move with Sign-Extension".
+//
+// Mnemonic : MOVSX
+// Supported forms : (2 forms)
+//
+// * MOVSBQ r8, r64
+// * MOVSBQ m8, r64
+//
+func (self *Program) MOVSBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSBQ", 2, Operands { v0, v1 })
+ // MOVSBQ r8, r64
+ if isReg8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVSBQ m8, r64
+ if isM8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbe)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSBQ")
+ }
+ return p
+}
+
+// MOVSBW performs "Move with Sign-Extension".
+//
+// Mnemonic : MOVSX
+// Supported forms : (2 forms)
+//
+// * MOVSBW r8, r16
+// * MOVSBW m8, r16
+//
+func (self *Program) MOVSBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSBW", 2, Operands { v0, v1 })
+ // MOVSBW r8, r16
+ if isReg8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVSBW m8, r16
+ if isM8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbe)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSBW")
+ }
+ return p
+}
+
+// MOVSD performs "Move Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : MOVSD
+// Supported forms : (3 forms)
+//
+// * MOVSD xmm, xmm [SSE2]
+// * MOVSD m64, xmm [SSE2]
+// * MOVSD xmm, m64 [SSE2]
+//
+func (self *Program) MOVSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSD", 2, Operands { v0, v1 })
+ // MOVSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVSD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVSD xmm, m64
+ if isXMM(v0) && isM64(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSD")
+ }
+ return p
+}
+
+// MOVSHDUP performs "Move Packed Single-FP High and Duplicate".
+//
+// Mnemonic : MOVSHDUP
+// Supported forms : (2 forms)
+//
+// * MOVSHDUP xmm, xmm [SSE3]
+// * MOVSHDUP m128, xmm [SSE3]
+//
+func (self *Program) MOVSHDUP(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSHDUP", 2, Operands { v0, v1 })
+ // MOVSHDUP xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVSHDUP m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSHDUP")
+ }
+ return p
+}
+
+// MOVSLDUP performs "Move Packed Single-FP Low and Duplicate".
+//
+// Mnemonic : MOVSLDUP
+// Supported forms : (2 forms)
+//
+// * MOVSLDUP xmm, xmm [SSE3]
+// * MOVSLDUP m128, xmm [SSE3]
+//
+func (self *Program) MOVSLDUP(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSLDUP", 2, Operands { v0, v1 })
+ // MOVSLDUP xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVSLDUP m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSLDUP")
+ }
+ return p
+}
+
+// MOVSLQ performs "Move Doubleword to Quadword with Sign-Extension".
+//
+// Mnemonic : MOVSXD
+// Supported forms : (2 forms)
+//
+// * MOVSLQ r32, r64
+// * MOVSLQ m32, r64
+//
+func (self *Program) MOVSLQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSLQ", 2, Operands { v0, v1 })
+ // MOVSLQ r32, r64
+ if isReg32(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVSLQ m32, r64
+ if isM32(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x63)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSLQ")
+ }
+ return p
+}
+
+// MOVSS performs "Move Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : MOVSS
+// Supported forms : (3 forms)
+//
+// * MOVSS xmm, xmm [SSE]
+// * MOVSS m32, xmm [SSE]
+// * MOVSS xmm, m32 [SSE]
+//
+func (self *Program) MOVSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSS", 2, Operands { v0, v1 })
+ // MOVSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVSS xmm, m32
+ if isXMM(v0) && isM32(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSS")
+ }
+ return p
+}
+
+// MOVSWL performs "Move with Sign-Extension".
+//
+// Mnemonic : MOVSX
+// Supported forms : (2 forms)
+//
+// * MOVSWL r16, r32
+// * MOVSWL m16, r32
+//
+func (self *Program) MOVSWL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSWL", 2, Operands { v0, v1 })
+ // MOVSWL r16, r32
+ if isReg16(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xbf)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVSWL m16, r32
+ if isM16(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbf)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSWL")
+ }
+ return p
+}
+
+// MOVSWQ performs "Move with Sign-Extension".
+//
+// Mnemonic : MOVSX
+// Supported forms : (2 forms)
+//
+// * MOVSWQ r16, r64
+// * MOVSWQ m16, r64
+//
+func (self *Program) MOVSWQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVSWQ", 2, Operands { v0, v1 })
+ // MOVSWQ r16, r64
+ if isReg16(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbf)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVSWQ m16, r64
+ if isM16(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbf)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVSWQ")
+ }
+ return p
+}
+
+// MOVUPD performs "Move Unaligned Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : MOVUPD
+// Supported forms : (3 forms)
+//
+// * MOVUPD xmm, xmm [SSE2]
+// * MOVUPD m128, xmm [SSE2]
+// * MOVUPD xmm, m128 [SSE2]
+//
+func (self *Program) MOVUPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVUPD", 2, Operands { v0, v1 })
+ // MOVUPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVUPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVUPD xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVUPD")
+ }
+ return p
+}
+
+// MOVUPS performs "Move Unaligned Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : MOVUPS
+// Supported forms : (3 forms)
+//
+// * MOVUPS xmm, xmm [SSE]
+// * MOVUPS m128, xmm [SSE]
+// * MOVUPS xmm, m128 [SSE]
+//
+func (self *Program) MOVUPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVUPS", 2, Operands { v0, v1 })
+ // MOVUPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // MOVUPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVUPS xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVUPS")
+ }
+ return p
+}
+
+// MOVW performs "Move".
+//
+// Mnemonic : MOV
+// Supported forms : (5 forms)
+//
+// * MOVW imm16, r16
+// * MOVW r16, r16
+// * MOVW m16, r16
+// * MOVW imm16, m16
+// * MOVW r16, m16
+//
+func (self *Program) MOVW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVW", 2, Operands { v0, v1 })
+ // MOVW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xc7)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xb8 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // MOVW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x89)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x8b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x8b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // MOVW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc7)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // MOVW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x89)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVW")
+ }
+ return p
+}
+
+// MOVZBL performs "Move with Zero-Extend".
+//
+// Mnemonic : MOVZX
+// Supported forms : (2 forms)
+//
+// * MOVZBL r8, r32
+// * MOVZBL m8, r32
+//
+func (self *Program) MOVZBL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVZBL", 2, Operands { v0, v1 })
+ // MOVZBL r8, r32
+ if isReg8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVZBL m8, r32
+ if isM8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xb6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVZBL")
+ }
+ return p
+}
+
+// MOVZBQ performs "Move with Zero-Extend".
+//
+// Mnemonic : MOVZX
+// Supported forms : (2 forms)
+//
+// * MOVZBQ r8, r64
+// * MOVZBQ m8, r64
+//
+func (self *Program) MOVZBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVZBQ", 2, Operands { v0, v1 })
+ // MOVZBQ r8, r64
+ if isReg8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVZBQ m8, r64
+ if isM8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xb6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVZBQ")
+ }
+ return p
+}
+
+// MOVZBW performs "Move with Zero-Extend".
+//
+// Mnemonic : MOVZX
+// Supported forms : (2 forms)
+//
+// * MOVZBW r8, r16
+// * MOVZBW m8, r16
+//
+func (self *Program) MOVZBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVZBW", 2, Operands { v0, v1 })
+ // MOVZBW r8, r16
+ if isReg8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVZBW m8, r16
+ if isM8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xb6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVZBW")
+ }
+ return p
+}
+
+// MOVZWL performs "Move with Zero-Extend".
+//
+// Mnemonic : MOVZX
+// Supported forms : (2 forms)
+//
+// * MOVZWL r16, r32
+// * MOVZWL m16, r32
+//
+func (self *Program) MOVZWL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVZWL", 2, Operands { v0, v1 })
+ // MOVZWL r16, r32
+ if isReg16(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVZWL m16, r32
+ if isM16(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xb7)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVZWL")
+ }
+ return p
+}
+
+// MOVZWQ performs "Move with Zero-Extend".
+//
+// Mnemonic : MOVZX
+// Supported forms : (2 forms)
+//
+// * MOVZWQ r16, r64
+// * MOVZWQ m16, r64
+//
+func (self *Program) MOVZWQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MOVZWQ", 2, Operands { v0, v1 })
+ // MOVZWQ r16, r64
+ if isReg16(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MOVZWQ m16, r64
+ if isM16(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xb7)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MOVZWQ")
+ }
+ return p
+}
+
+// MPSADBW performs "Compute Multiple Packed Sums of Absolute Difference".
+//
+// Mnemonic : MPSADBW
+// Supported forms : (2 forms)
+//
+// * MPSADBW imm8, xmm, xmm [SSE4.1]
+// * MPSADBW imm8, m128, xmm [SSE4.1]
+//
+func (self *Program) MPSADBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("MPSADBW", 3, Operands { v0, v1, v2 })
+ // MPSADBW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // MPSADBW imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x42)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MPSADBW")
+ }
+ return p
+}
+
+// MULB performs "Unsigned Multiply".
+//
+// Mnemonic : MUL
+// Supported forms : (2 forms)
+//
+// * MULB r8
+// * MULB m8
+//
+func (self *Program) MULB(v0 interface{}) *Instruction {
+ p := self.alloc("MULB", 1, Operands { v0 })
+ // MULB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0xf6)
+ m.emit(0xe0 | lcode(v[0]))
+ })
+ }
+ // MULB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf6)
+ m.mrsd(4, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULB")
+ }
+ return p
+}
+
+// MULL performs "Unsigned Multiply".
+//
+// Mnemonic : MUL
+// Supported forms : (2 forms)
+//
+// * MULL r32
+// * MULL m32
+//
+func (self *Program) MULL(v0 interface{}) *Instruction {
+ p := self.alloc("MULL", 1, Operands { v0 })
+ // MULL r32
+ if isReg32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xe0 | lcode(v[0]))
+ })
+ }
+ // MULL m32
+ if isM32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(4, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULL")
+ }
+ return p
+}
+
+// MULPD performs "Multiply Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : MULPD
+// Supported forms : (2 forms)
+//
+// * MULPD xmm, xmm [SSE2]
+// * MULPD m128, xmm [SSE2]
+//
+func (self *Program) MULPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MULPD", 2, Operands { v0, v1 })
+ // MULPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MULPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULPD")
+ }
+ return p
+}
+
+// MULPS performs "Multiply Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : MULPS
+// Supported forms : (2 forms)
+//
+// * MULPS xmm, xmm [SSE]
+// * MULPS m128, xmm [SSE]
+//
+func (self *Program) MULPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MULPS", 2, Operands { v0, v1 })
+ // MULPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MULPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULPS")
+ }
+ return p
+}
+
+// MULQ performs "Unsigned Multiply".
+//
+// Mnemonic : MUL
+// Supported forms : (2 forms)
+//
+// * MULQ r64
+// * MULQ m64
+//
+func (self *Program) MULQ(v0 interface{}) *Instruction {
+ p := self.alloc("MULQ", 1, Operands { v0 })
+ // MULQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0xf7)
+ m.emit(0xe0 | lcode(v[0]))
+ })
+ }
+ // MULQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[0]))
+ m.emit(0xf7)
+ m.mrsd(4, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULQ")
+ }
+ return p
+}
+
+// MULSD performs "Multiply Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : MULSD
+// Supported forms : (2 forms)
+//
+// * MULSD xmm, xmm [SSE2]
+// * MULSD m64, xmm [SSE2]
+//
+func (self *Program) MULSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MULSD", 2, Operands { v0, v1 })
+ // MULSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MULSD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULSD")
+ }
+ return p
+}
+
+// MULSS performs "Multiply Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : MULSS
+// Supported forms : (2 forms)
+//
+// * MULSS xmm, xmm [SSE]
+// * MULSS m32, xmm [SSE]
+//
+func (self *Program) MULSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("MULSS", 2, Operands { v0, v1 })
+ // MULSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // MULSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULSS")
+ }
+ return p
+}
+
+// MULW performs "Unsigned Multiply".
+//
+// Mnemonic : MUL
+// Supported forms : (2 forms)
+//
+// * MULW r16
+// * MULW m16
+//
+func (self *Program) MULW(v0 interface{}) *Instruction {
+ p := self.alloc("MULW", 1, Operands { v0 })
+ // MULW r16
+ if isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xe0 | lcode(v[0]))
+ })
+ }
+ // MULW m16
+ if isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(4, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULW")
+ }
+ return p
+}
+
+// MULXL performs "Unsigned Multiply Without Affecting Flags".
+//
+// Mnemonic : MULX
+// Supported forms : (2 forms)
+//
+// * MULXL r32, r32, r32 [BMI2]
+// * MULXL m32, r32, r32 [BMI2]
+//
+func (self *Program) MULXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("MULXL", 3, Operands { v0, v1, v2 })
+ // MULXL r32, r32, r32
+ if isReg32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7b ^ (hlcode(v[1]) << 3))
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // MULXL m32, r32, r32
+ if isM32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULXL")
+ }
+ return p
+}
+
+// MULXQ performs "Unsigned Multiply Without Affecting Flags".
+//
+// Mnemonic : MULX
+// Supported forms : (2 forms)
+//
+// * MULXQ r64, r64, r64 [BMI2]
+// * MULXQ m64, r64, r64 [BMI2]
+//
+func (self *Program) MULXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("MULXQ", 3, Operands { v0, v1, v2 })
+ // MULXQ r64, r64, r64
+ if isReg64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfb ^ (hlcode(v[1]) << 3))
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // MULXQ m64, r64, r64
+ if isM64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for MULXQ")
+ }
+ return p
+}
+
+// MWAIT performs "Monitor Wait".
+//
+// Mnemonic : MWAIT
+// Supported forms : (1 form)
+//
+// * MWAIT [MONITOR]
+//
+func (self *Program) MWAIT() *Instruction {
+ p := self.alloc("MWAIT", 0, Operands { })
+ // MWAIT
+ self.require(ISA_MONITOR)
+ p.domain = DomainMisc
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x01)
+ m.emit(0xc9)
+ })
+ return p
+}
+
+// MWAITX performs "Monitor Wait with Timeout".
+//
+// Mnemonic : MWAITX
+// Supported forms : (1 form)
+//
+// * MWAITX [MONITORX]
+//
+func (self *Program) MWAITX() *Instruction {
+ p := self.alloc("MWAITX", 0, Operands { })
+ // MWAITX
+ self.require(ISA_MONITORX)
+ p.domain = DomainMisc
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x01)
+ m.emit(0xfb)
+ })
+ return p
+}
+
+// NEGB performs "Two's Complement Negation".
+//
+// Mnemonic : NEG
+// Supported forms : (2 forms)
+//
+// * NEGB r8
+// * NEGB m8
+//
+func (self *Program) NEGB(v0 interface{}) *Instruction {
+ p := self.alloc("NEGB", 1, Operands { v0 })
+ // NEGB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0xf6)
+ m.emit(0xd8 | lcode(v[0]))
+ })
+ }
+ // NEGB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf6)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for NEGB")
+ }
+ return p
+}
+
+// NEGL performs "Two's Complement Negation".
+//
+// Mnemonic : NEG
+// Supported forms : (2 forms)
+//
+// * NEGL r32
+// * NEGL m32
+//
+func (self *Program) NEGL(v0 interface{}) *Instruction {
+ p := self.alloc("NEGL", 1, Operands { v0 })
+ // NEGL r32
+ if isReg32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xd8 | lcode(v[0]))
+ })
+ }
+ // NEGL m32
+ if isM32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for NEGL")
+ }
+ return p
+}
+
+// NEGQ performs "Two's Complement Negation".
+//
+// Mnemonic : NEG
+// Supported forms : (2 forms)
+//
+// * NEGQ r64
+// * NEGQ m64
+//
+func (self *Program) NEGQ(v0 interface{}) *Instruction {
+ p := self.alloc("NEGQ", 1, Operands { v0 })
+ // NEGQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0xf7)
+ m.emit(0xd8 | lcode(v[0]))
+ })
+ }
+ // NEGQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[0]))
+ m.emit(0xf7)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for NEGQ")
+ }
+ return p
+}
+
+// NEGW performs "Two's Complement Negation".
+//
+// Mnemonic : NEG
+// Supported forms : (2 forms)
+//
+// * NEGW r16
+// * NEGW m16
+//
+func (self *Program) NEGW(v0 interface{}) *Instruction {
+ p := self.alloc("NEGW", 1, Operands { v0 })
+ // NEGW r16
+ if isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xd8 | lcode(v[0]))
+ })
+ }
+ // NEGW m16
+ if isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for NEGW")
+ }
+ return p
+}
+
+// NOP performs "No Operation".
+//
+// Mnemonic : NOP
+// Supported forms : (1 form)
+//
+// * NOP
+//
+func (self *Program) NOP() *Instruction {
+ p := self.alloc("NOP", 0, Operands { })
+ // NOP
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x90)
+ })
+ return p
+}
+
+// NOTB performs "One's Complement Negation".
+//
+// Mnemonic : NOT
+// Supported forms : (2 forms)
+//
+// * NOTB r8
+// * NOTB m8
+//
+func (self *Program) NOTB(v0 interface{}) *Instruction {
+ p := self.alloc("NOTB", 1, Operands { v0 })
+ // NOTB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0xf6)
+ m.emit(0xd0 | lcode(v[0]))
+ })
+ }
+ // NOTB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf6)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for NOTB")
+ }
+ return p
+}
+
+// NOTL performs "One's Complement Negation".
+//
+// Mnemonic : NOT
+// Supported forms : (2 forms)
+//
+// * NOTL r32
+// * NOTL m32
+//
+func (self *Program) NOTL(v0 interface{}) *Instruction {
+ p := self.alloc("NOTL", 1, Operands { v0 })
+ // NOTL r32
+ if isReg32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xd0 | lcode(v[0]))
+ })
+ }
+ // NOTL m32
+ if isM32(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for NOTL")
+ }
+ return p
+}
+
+// NOTQ performs "One's Complement Negation".
+//
+// Mnemonic : NOT
+// Supported forms : (2 forms)
+//
+// * NOTQ r64
+// * NOTQ m64
+//
+func (self *Program) NOTQ(v0 interface{}) *Instruction {
+ p := self.alloc("NOTQ", 1, Operands { v0 })
+ // NOTQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0xf7)
+ m.emit(0xd0 | lcode(v[0]))
+ })
+ }
+ // NOTQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[0]))
+ m.emit(0xf7)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for NOTQ")
+ }
+ return p
+}
+
+// NOTW performs "One's Complement Negation".
+//
+// Mnemonic : NOT
+// Supported forms : (2 forms)
+//
+// * NOTW r16
+// * NOTW m16
+//
+func (self *Program) NOTW(v0 interface{}) *Instruction {
+ p := self.alloc("NOTW", 1, Operands { v0 })
+ // NOTW r16
+ if isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0xf7)
+ m.emit(0xd0 | lcode(v[0]))
+ })
+ }
+ // NOTW m16
+ if isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xf7)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for NOTW")
+ }
+ return p
+}
+
+// ORB performs "Logical Inclusive OR".
+//
+// Mnemonic : OR
+// Supported forms : (6 forms)
+//
+// * ORB imm8, al
+// * ORB imm8, r8
+// * ORB r8, r8
+// * ORB m8, r8
+// * ORB imm8, m8
+// * ORB r8, m8
+//
+func (self *Program) ORB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ORB", 2, Operands { v0, v1 })
+ // ORB imm8, al
+ if isImm8(v0) && v1 == AL {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0c)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ORB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0x80)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ORB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x0a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ORB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x0a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ORB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x80)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ORB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x08)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ORB")
+ }
+ return p
+}
+
+// ORL performs "Logical Inclusive OR".
+//
+// Mnemonic : OR
+// Supported forms : (8 forms)
+//
+// * ORL imm32, eax
+// * ORL imm8, r32
+// * ORL imm32, r32
+// * ORL r32, r32
+// * ORL m32, r32
+// * ORL imm8, m32
+// * ORL imm32, m32
+// * ORL r32, m32
+//
+func (self *Program) ORL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ORL", 2, Operands { v0, v1 })
+ // ORL imm32, eax
+ if isImm32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0d)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ORL imm8, r32
+ if isImm8Ext(v0, 4) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ORL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ORL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ORL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ORL imm8, m32
+ if isImm8Ext(v0, 4) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ORL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ORL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x09)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ORL")
+ }
+ return p
+}
+
+// ORPD performs "Bitwise Logical OR of Double-Precision Floating-Point Values".
+//
+// Mnemonic : ORPD
+// Supported forms : (2 forms)
+//
+// * ORPD xmm, xmm [SSE2]
+// * ORPD m128, xmm [SSE2]
+//
+func (self *Program) ORPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ORPD", 2, Operands { v0, v1 })
+ // ORPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ORPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x56)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ORPD")
+ }
+ return p
+}
+
+// ORPS performs "Bitwise Logical OR of Single-Precision Floating-Point Values".
+//
+// Mnemonic : ORPS
+// Supported forms : (2 forms)
+//
+// * ORPS xmm, xmm [SSE]
+// * ORPS m128, xmm [SSE]
+//
+func (self *Program) ORPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ORPS", 2, Operands { v0, v1 })
+ // ORPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ORPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x56)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ORPS")
+ }
+ return p
+}
+
+// ORQ performs "Logical Inclusive OR".
+//
+// Mnemonic : OR
+// Supported forms : (8 forms)
+//
+// * ORQ imm32, rax
+// * ORQ imm8, r64
+// * ORQ imm32, r64
+// * ORQ r64, r64
+// * ORQ m64, r64
+// * ORQ imm8, m64
+// * ORQ imm32, m64
+// * ORQ r64, m64
+//
+func (self *Program) ORQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ORQ", 2, Operands { v0, v1 })
+ // ORQ imm32, rax
+ if isImm32(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x0d)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ORQ imm8, r64
+ if isImm8Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x83)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ORQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x81)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ORQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ORQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ORQ imm8, m64
+ if isImm8Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x83)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ORQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x81)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // ORQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x09)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ORQ")
+ }
+ return p
+}
+
+// ORW performs "Logical Inclusive OR".
+//
+// Mnemonic : OR
+// Supported forms : (8 forms)
+//
+// * ORW imm16, ax
+// * ORW imm8, r16
+// * ORW imm16, r16
+// * ORW r16, r16
+// * ORW m16, r16
+// * ORW imm8, m16
+// * ORW imm16, m16
+// * ORW r16, m16
+//
+func (self *Program) ORW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ORW", 2, Operands { v0, v1 })
+ // ORW imm16, ax
+ if isImm16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x0d)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ORW imm8, r16
+ if isImm8Ext(v0, 2) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ORW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ORW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // ORW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // ORW imm8, m16
+ if isImm8Ext(v0, 2) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ORW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // ORW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x09)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ORW")
+ }
+ return p
+}
+
+// PABSB performs "Packed Absolute Value of Byte Integers".
+//
+// Mnemonic : PABSB
+// Supported forms : (4 forms)
+//
+// * PABSB mm, mm [SSSE3]
+// * PABSB m64, mm [SSSE3]
+// * PABSB xmm, xmm [SSSE3]
+// * PABSB m128, xmm [SSSE3]
+//
+func (self *Program) PABSB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PABSB", 2, Operands { v0, v1 })
+ // PABSB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PABSB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PABSB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PABSB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PABSB")
+ }
+ return p
+}
+
+// PABSD performs "Packed Absolute Value of Doubleword Integers".
+//
+// Mnemonic : PABSD
+// Supported forms : (4 forms)
+//
+// * PABSD mm, mm [SSSE3]
+// * PABSD m64, mm [SSSE3]
+// * PABSD xmm, xmm [SSSE3]
+// * PABSD m128, xmm [SSSE3]
+//
+func (self *Program) PABSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PABSD", 2, Operands { v0, v1 })
+ // PABSD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PABSD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PABSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PABSD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PABSD")
+ }
+ return p
+}
+
+// PABSW performs "Packed Absolute Value of Word Integers".
+//
+// Mnemonic : PABSW
+// Supported forms : (4 forms)
+//
+// * PABSW mm, mm [SSSE3]
+// * PABSW m64, mm [SSSE3]
+// * PABSW xmm, xmm [SSSE3]
+// * PABSW m128, xmm [SSSE3]
+//
+func (self *Program) PABSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PABSW", 2, Operands { v0, v1 })
+ // PABSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PABSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PABSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PABSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PABSW")
+ }
+ return p
+}
+
+// PACKSSDW performs "Pack Doublewords into Words with Signed Saturation".
+//
+// Mnemonic : PACKSSDW
+// Supported forms : (4 forms)
+//
+// * PACKSSDW mm, mm [MMX]
+// * PACKSSDW m64, mm [MMX]
+// * PACKSSDW xmm, xmm [SSE2]
+// * PACKSSDW m128, xmm [SSE2]
+//
+func (self *Program) PACKSSDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PACKSSDW", 2, Operands { v0, v1 })
+ // PACKSSDW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PACKSSDW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PACKSSDW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PACKSSDW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PACKSSDW")
+ }
+ return p
+}
+
+// PACKSSWB performs "Pack Words into Bytes with Signed Saturation".
+//
+// Mnemonic : PACKSSWB
+// Supported forms : (4 forms)
+//
+// * PACKSSWB mm, mm [MMX]
+// * PACKSSWB m64, mm [MMX]
+// * PACKSSWB xmm, xmm [SSE2]
+// * PACKSSWB m128, xmm [SSE2]
+//
+func (self *Program) PACKSSWB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PACKSSWB", 2, Operands { v0, v1 })
+ // PACKSSWB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PACKSSWB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x63)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PACKSSWB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PACKSSWB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x63)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PACKSSWB")
+ }
+ return p
+}
+
+// PACKUSDW performs "Pack Doublewords into Words with Unsigned Saturation".
+//
+// Mnemonic : PACKUSDW
+// Supported forms : (2 forms)
+//
+// * PACKUSDW xmm, xmm [SSE4.1]
+// * PACKUSDW m128, xmm [SSE4.1]
+//
+func (self *Program) PACKUSDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PACKUSDW", 2, Operands { v0, v1 })
+ // PACKUSDW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x2b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PACKUSDW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PACKUSDW")
+ }
+ return p
+}
+
+// PACKUSWB performs "Pack Words into Bytes with Unsigned Saturation".
+//
+// Mnemonic : PACKUSWB
+// Supported forms : (4 forms)
+//
+// * PACKUSWB mm, mm [MMX]
+// * PACKUSWB m64, mm [MMX]
+// * PACKUSWB xmm, xmm [SSE2]
+// * PACKUSWB m128, xmm [SSE2]
+//
+func (self *Program) PACKUSWB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PACKUSWB", 2, Operands { v0, v1 })
+ // PACKUSWB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x67)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PACKUSWB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x67)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PACKUSWB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x67)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PACKUSWB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x67)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PACKUSWB")
+ }
+ return p
+}
+
+// PADDB performs "Add Packed Byte Integers".
+//
+// Mnemonic : PADDB
+// Supported forms : (4 forms)
+//
+// * PADDB mm, mm [MMX]
+// * PADDB m64, mm [MMX]
+// * PADDB xmm, xmm [SSE2]
+// * PADDB m128, xmm [SSE2]
+//
+func (self *Program) PADDB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PADDB", 2, Operands { v0, v1 })
+ // PADDB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PADDB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PADDB")
+ }
+ return p
+}
+
+// PADDD performs "Add Packed Doubleword Integers".
+//
+// Mnemonic : PADDD
+// Supported forms : (4 forms)
+//
+// * PADDD mm, mm [MMX]
+// * PADDD m64, mm [MMX]
+// * PADDD xmm, xmm [SSE2]
+// * PADDD m128, xmm [SSE2]
+//
+func (self *Program) PADDD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PADDD", 2, Operands { v0, v1 })
+ // PADDD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfe)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfe)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PADDD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfe)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfe)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PADDD")
+ }
+ return p
+}
+
+// PADDQ performs "Add Packed Quadword Integers".
+//
+// Mnemonic : PADDQ
+// Supported forms : (4 forms)
+//
+// * PADDQ mm, mm [SSE2]
+// * PADDQ m64, mm [SSE2]
+// * PADDQ xmm, xmm [SSE2]
+// * PADDQ m128, xmm [SSE2]
+//
+func (self *Program) PADDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PADDQ", 2, Operands { v0, v1 })
+ // PADDQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDQ m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PADDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PADDQ")
+ }
+ return p
+}
+
+// PADDSB performs "Add Packed Signed Byte Integers with Signed Saturation".
+//
+// Mnemonic : PADDSB
+// Supported forms : (4 forms)
+//
+// * PADDSB mm, mm [MMX]
+// * PADDSB m64, mm [MMX]
+// * PADDSB xmm, xmm [SSE2]
+// * PADDSB m128, xmm [SSE2]
+//
+func (self *Program) PADDSB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PADDSB", 2, Operands { v0, v1 })
+ // PADDSB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xec)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDSB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xec)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PADDSB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xec)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDSB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xec)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PADDSB")
+ }
+ return p
+}
+
+// PADDSW performs "Add Packed Signed Word Integers with Signed Saturation".
+//
+// Mnemonic : PADDSW
+// Supported forms : (4 forms)
+//
+// * PADDSW mm, mm [MMX]
+// * PADDSW m64, mm [MMX]
+// * PADDSW xmm, xmm [SSE2]
+// * PADDSW m128, xmm [SSE2]
+//
+func (self *Program) PADDSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PADDSW", 2, Operands { v0, v1 })
+ // PADDSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xed)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xed)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PADDSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xed)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xed)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PADDSW")
+ }
+ return p
+}
+
+// PADDUSB performs "Add Packed Unsigned Byte Integers with Unsigned Saturation".
+//
+// Mnemonic : PADDUSB
+// Supported forms : (4 forms)
+//
+// * PADDUSB mm, mm [MMX]
+// * PADDUSB m64, mm [MMX]
+// * PADDUSB xmm, xmm [SSE2]
+// * PADDUSB m128, xmm [SSE2]
+//
+func (self *Program) PADDUSB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PADDUSB", 2, Operands { v0, v1 })
+ // PADDUSB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xdc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDUSB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xdc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PADDUSB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xdc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDUSB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xdc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PADDUSB")
+ }
+ return p
+}
+
+// PADDUSW performs "Add Packed Unsigned Word Integers with Unsigned Saturation".
+//
+// Mnemonic : PADDUSW
+// Supported forms : (4 forms)
+//
+// * PADDUSW mm, mm [MMX]
+// * PADDUSW m64, mm [MMX]
+// * PADDUSW xmm, xmm [SSE2]
+// * PADDUSW m128, xmm [SSE2]
+//
+func (self *Program) PADDUSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PADDUSW", 2, Operands { v0, v1 })
+ // PADDUSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xdd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDUSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xdd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PADDUSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xdd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDUSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xdd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PADDUSW")
+ }
+ return p
+}
+
+// PADDW performs "Add Packed Word Integers".
+//
+// Mnemonic : PADDW
+// Supported forms : (4 forms)
+//
+// * PADDW mm, mm [MMX]
+// * PADDW m64, mm [MMX]
+// * PADDW xmm, xmm [SSE2]
+// * PADDW m128, xmm [SSE2]
+//
+func (self *Program) PADDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PADDW", 2, Operands { v0, v1 })
+ // PADDW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PADDW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PADDW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PADDW")
+ }
+ return p
+}
+
+// PALIGNR performs "Packed Align Right".
+//
+// Mnemonic : PALIGNR
+// Supported forms : (4 forms)
+//
+// * PALIGNR imm8, mm, mm [SSSE3]
+// * PALIGNR imm8, m64, mm [SSSE3]
+// * PALIGNR imm8, xmm, xmm [SSSE3]
+// * PALIGNR imm8, m128, xmm [SSSE3]
+//
+func (self *Program) PALIGNR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PALIGNR", 3, Operands { v0, v1, v2 })
+ // PALIGNR imm8, mm, mm
+ if isImm8(v0) && isMM(v1) && isMM(v2) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PALIGNR imm8, m64, mm
+ if isImm8(v0) && isM64(v1) && isMM(v2) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PALIGNR imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PALIGNR imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PALIGNR")
+ }
+ return p
+}
+
+// PAND performs "Packed Bitwise Logical AND".
+//
+// Mnemonic : PAND
+// Supported forms : (4 forms)
+//
+// * PAND mm, mm [MMX]
+// * PAND m64, mm [MMX]
+// * PAND xmm, xmm [SSE2]
+// * PAND m128, xmm [SSE2]
+//
+func (self *Program) PAND(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PAND", 2, Operands { v0, v1 })
+ // PAND mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PAND m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xdb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PAND xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PAND m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xdb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PAND")
+ }
+ return p
+}
+
+// PANDN performs "Packed Bitwise Logical AND NOT".
+//
+// Mnemonic : PANDN
+// Supported forms : (4 forms)
+//
+// * PANDN mm, mm [MMX]
+// * PANDN m64, mm [MMX]
+// * PANDN xmm, xmm [SSE2]
+// * PANDN m128, xmm [SSE2]
+//
+func (self *Program) PANDN(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PANDN", 2, Operands { v0, v1 })
+ // PANDN mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PANDN m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xdf)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PANDN xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PANDN m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xdf)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PANDN")
+ }
+ return p
+}
+
+// PAUSE performs "Spin Loop Hint".
+//
+// Mnemonic : PAUSE
+// Supported forms : (1 form)
+//
+// * PAUSE
+//
+func (self *Program) PAUSE() *Instruction {
+ p := self.alloc("PAUSE", 0, Operands { })
+ // PAUSE
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.emit(0x90)
+ })
+ return p
+}
+
+// PAVGB performs "Average Packed Byte Integers".
+//
+// Mnemonic : PAVGB
+// Supported forms : (4 forms)
+//
+// * PAVGB mm, mm [MMX+]
+// * PAVGB m64, mm [MMX+]
+// * PAVGB xmm, xmm [SSE2]
+// * PAVGB m128, xmm [SSE2]
+//
+func (self *Program) PAVGB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PAVGB", 2, Operands { v0, v1 })
+ // PAVGB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe0)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PAVGB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PAVGB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe0)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PAVGB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PAVGB")
+ }
+ return p
+}
+
+// PAVGUSB performs "Average Packed Byte Integers".
+//
+// Mnemonic : PAVGUSB
+// Supported forms : (2 forms)
+//
+// * PAVGUSB mm, mm [3dnow!]
+// * PAVGUSB m64, mm [3dnow!]
+//
+func (self *Program) PAVGUSB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PAVGUSB", 2, Operands { v0, v1 })
+ // PAVGUSB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xbf)
+ })
+ }
+ // PAVGUSB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xbf)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PAVGUSB")
+ }
+ return p
+}
+
+// PAVGW performs "Average Packed Word Integers".
+//
+// Mnemonic : PAVGW
+// Supported forms : (4 forms)
+//
+// * PAVGW mm, mm [MMX+]
+// * PAVGW m64, mm [MMX+]
+// * PAVGW xmm, xmm [SSE2]
+// * PAVGW m128, xmm [SSE2]
+//
+func (self *Program) PAVGW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PAVGW", 2, Operands { v0, v1 })
+ // PAVGW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe3)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PAVGW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe3)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PAVGW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe3)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PAVGW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe3)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PAVGW")
+ }
+ return p
+}
+
+// PBLENDVB performs "Variable Blend Packed Bytes".
+//
+// Mnemonic : PBLENDVB
+// Supported forms : (2 forms)
+//
+// * PBLENDVB xmm0, xmm, xmm [SSE4.1]
+// * PBLENDVB xmm0, m128, xmm [SSE4.1]
+//
+func (self *Program) PBLENDVB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PBLENDVB", 3, Operands { v0, v1, v2 })
+ // PBLENDVB xmm0, xmm, xmm
+ if v0 == XMM0 && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // PBLENDVB xmm0, m128, xmm
+ if v0 == XMM0 && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x10)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PBLENDVB")
+ }
+ return p
+}
+
+// PBLENDW performs "Blend Packed Words".
+//
+// Mnemonic : PBLENDW
+// Supported forms : (2 forms)
+//
+// * PBLENDW imm8, xmm, xmm [SSE4.1]
+// * PBLENDW imm8, m128, xmm [SSE4.1]
+//
+func (self *Program) PBLENDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PBLENDW", 3, Operands { v0, v1, v2 })
+ // PBLENDW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PBLENDW imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0e)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PBLENDW")
+ }
+ return p
+}
+
+// PCLMULQDQ performs "Carry-Less Quadword Multiplication".
+//
+// Mnemonic : PCLMULQDQ
+// Supported forms : (2 forms)
+//
+// * PCLMULQDQ imm8, xmm, xmm [PCLMULQDQ]
+// * PCLMULQDQ imm8, m128, xmm [PCLMULQDQ]
+//
+func (self *Program) PCLMULQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PCLMULQDQ", 3, Operands { v0, v1, v2 })
+ // PCLMULQDQ imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_PCLMULQDQ)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PCLMULQDQ imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_PCLMULQDQ)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x44)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCLMULQDQ")
+ }
+ return p
+}
+
+// PCMPEQB performs "Compare Packed Byte Data for Equality".
+//
+// Mnemonic : PCMPEQB
+// Supported forms : (4 forms)
+//
+// * PCMPEQB mm, mm [MMX]
+// * PCMPEQB m64, mm [MMX]
+// * PCMPEQB xmm, xmm [SSE2]
+// * PCMPEQB m128, xmm [SSE2]
+//
+func (self *Program) PCMPEQB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PCMPEQB", 2, Operands { v0, v1 })
+ // PCMPEQB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x74)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPEQB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x74)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PCMPEQB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x74)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPEQB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x74)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPEQB")
+ }
+ return p
+}
+
+// PCMPEQD performs "Compare Packed Doubleword Data for Equality".
+//
+// Mnemonic : PCMPEQD
+// Supported forms : (4 forms)
+//
+// * PCMPEQD mm, mm [MMX]
+// * PCMPEQD m64, mm [MMX]
+// * PCMPEQD xmm, xmm [SSE2]
+// * PCMPEQD m128, xmm [SSE2]
+//
+func (self *Program) PCMPEQD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PCMPEQD", 2, Operands { v0, v1 })
+ // PCMPEQD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPEQD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x76)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PCMPEQD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPEQD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x76)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPEQD")
+ }
+ return p
+}
+
+// PCMPEQQ performs "Compare Packed Quadword Data for Equality".
+//
+// Mnemonic : PCMPEQQ
+// Supported forms : (2 forms)
+//
+// * PCMPEQQ xmm, xmm [SSE4.1]
+// * PCMPEQQ m128, xmm [SSE4.1]
+//
+func (self *Program) PCMPEQQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PCMPEQQ", 2, Operands { v0, v1 })
+ // PCMPEQQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPEQQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x29)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPEQQ")
+ }
+ return p
+}
+
+// PCMPEQW performs "Compare Packed Word Data for Equality".
+//
+// Mnemonic : PCMPEQW
+// Supported forms : (4 forms)
+//
+// * PCMPEQW mm, mm [MMX]
+// * PCMPEQW m64, mm [MMX]
+// * PCMPEQW xmm, xmm [SSE2]
+// * PCMPEQW m128, xmm [SSE2]
+//
+func (self *Program) PCMPEQW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PCMPEQW", 2, Operands { v0, v1 })
+ // PCMPEQW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPEQW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x75)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PCMPEQW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPEQW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x75)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPEQW")
+ }
+ return p
+}
+
+// PCMPESTRI performs "Packed Compare Explicit Length Strings, Return Index".
+//
+// Mnemonic : PCMPESTRI
+// Supported forms : (2 forms)
+//
+// * PCMPESTRI imm8, xmm, xmm [SSE4.2]
+// * PCMPESTRI imm8, m128, xmm [SSE4.2]
+//
+func (self *Program) PCMPESTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PCMPESTRI", 3, Operands { v0, v1, v2 })
+ // PCMPESTRI imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x61)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PCMPESTRI imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x61)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPESTRI")
+ }
+ return p
+}
+
+// PCMPESTRM performs "Packed Compare Explicit Length Strings, Return Mask".
+//
+// Mnemonic : PCMPESTRM
+// Supported forms : (2 forms)
+//
+// * PCMPESTRM imm8, xmm, xmm [SSE4.2]
+// * PCMPESTRM imm8, m128, xmm [SSE4.2]
+//
+func (self *Program) PCMPESTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PCMPESTRM", 3, Operands { v0, v1, v2 })
+ // PCMPESTRM imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x60)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PCMPESTRM imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x60)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPESTRM")
+ }
+ return p
+}
+
+// PCMPGTB performs "Compare Packed Signed Byte Integers for Greater Than".
+//
+// Mnemonic : PCMPGTB
+// Supported forms : (4 forms)
+//
+// * PCMPGTB mm, mm [MMX]
+// * PCMPGTB m64, mm [MMX]
+// * PCMPGTB xmm, xmm [SSE2]
+// * PCMPGTB m128, xmm [SSE2]
+//
+func (self *Program) PCMPGTB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PCMPGTB", 2, Operands { v0, v1 })
+ // PCMPGTB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPGTB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x64)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PCMPGTB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPGTB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x64)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPGTB")
+ }
+ return p
+}
+
+// PCMPGTD performs "Compare Packed Signed Doubleword Integers for Greater Than".
+//
+// Mnemonic : PCMPGTD
+// Supported forms : (4 forms)
+//
+// * PCMPGTD mm, mm [MMX]
+// * PCMPGTD m64, mm [MMX]
+// * PCMPGTD xmm, xmm [SSE2]
+// * PCMPGTD m128, xmm [SSE2]
+//
+func (self *Program) PCMPGTD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PCMPGTD", 2, Operands { v0, v1 })
+ // PCMPGTD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPGTD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x66)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PCMPGTD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPGTD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x66)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPGTD")
+ }
+ return p
+}
+
+// PCMPGTQ performs "Compare Packed Data for Greater Than".
+//
+// Mnemonic : PCMPGTQ
+// Supported forms : (2 forms)
+//
+// * PCMPGTQ xmm, xmm [SSE4.2]
+// * PCMPGTQ m128, xmm [SSE4.2]
+//
+func (self *Program) PCMPGTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PCMPGTQ", 2, Operands { v0, v1 })
+ // PCMPGTQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x37)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPGTQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x37)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPGTQ")
+ }
+ return p
+}
+
+// PCMPGTW performs "Compare Packed Signed Word Integers for Greater Than".
+//
+// Mnemonic : PCMPGTW
+// Supported forms : (4 forms)
+//
+// * PCMPGTW mm, mm [MMX]
+// * PCMPGTW m64, mm [MMX]
+// * PCMPGTW xmm, xmm [SSE2]
+// * PCMPGTW m128, xmm [SSE2]
+//
+func (self *Program) PCMPGTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PCMPGTW", 2, Operands { v0, v1 })
+ // PCMPGTW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPGTW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x65)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PCMPGTW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PCMPGTW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x65)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPGTW")
+ }
+ return p
+}
+
+// PCMPISTRI performs "Packed Compare Implicit Length Strings, Return Index".
+//
+// Mnemonic : PCMPISTRI
+// Supported forms : (2 forms)
+//
+// * PCMPISTRI imm8, xmm, xmm [SSE4.2]
+// * PCMPISTRI imm8, m128, xmm [SSE4.2]
+//
+func (self *Program) PCMPISTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PCMPISTRI", 3, Operands { v0, v1, v2 })
+ // PCMPISTRI imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PCMPISTRI imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x63)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPISTRI")
+ }
+ return p
+}
+
+// PCMPISTRM performs "Packed Compare Implicit Length Strings, Return Mask".
+//
+// Mnemonic : PCMPISTRM
+// Supported forms : (2 forms)
+//
+// * PCMPISTRM imm8, xmm, xmm [SSE4.2]
+// * PCMPISTRM imm8, m128, xmm [SSE4.2]
+//
+func (self *Program) PCMPISTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PCMPISTRM", 3, Operands { v0, v1, v2 })
+ // PCMPISTRM imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x62)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PCMPISTRM imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x62)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PCMPISTRM")
+ }
+ return p
+}
+
+// PDEP performs "Parallel Bits Deposit".
+//
+// Mnemonic : PDEP
+// Supported forms : (4 forms)
+//
+// * PDEP r32, r32, r32 [BMI2]
+// * PDEP m32, r32, r32 [BMI2]
+// * PDEP r64, r64, r64 [BMI2]
+// * PDEP m64, r64, r64 [BMI2]
+//
+func (self *Program) PDEP(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PDEP", 3, Operands { v0, v1, v2 })
+ // PDEP r32, r32, r32
+ if isReg32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7b ^ (hlcode(v[1]) << 3))
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // PDEP m32, r32, r32
+ if isM32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // PDEP r64, r64, r64
+ if isReg64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfb ^ (hlcode(v[1]) << 3))
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // PDEP m64, r64, r64
+ if isM64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PDEP")
+ }
+ return p
+}
+
+// PEXT performs "Parallel Bits Extract".
+//
+// Mnemonic : PEXT
+// Supported forms : (4 forms)
+//
+// * PEXT r32, r32, r32 [BMI2]
+// * PEXT m32, r32, r32 [BMI2]
+// * PEXT r64, r64, r64 [BMI2]
+// * PEXT m64, r64, r64 [BMI2]
+//
+func (self *Program) PEXT(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PEXT", 3, Operands { v0, v1, v2 })
+ // PEXT r32, r32, r32
+ if isReg32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7a ^ (hlcode(v[1]) << 3))
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // PEXT m32, r32, r32
+ if isM32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x02, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // PEXT r64, r64, r64
+ if isReg64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfa ^ (hlcode(v[1]) << 3))
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // PEXT m64, r64, r64
+ if isM64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x82, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PEXT")
+ }
+ return p
+}
+
+// PEXTRB performs "Extract Byte".
+//
+// Mnemonic : PEXTRB
+// Supported forms : (2 forms)
+//
+// * PEXTRB imm8, xmm, r32 [SSE4.1]
+// * PEXTRB imm8, xmm, m8 [SSE4.1]
+//
+func (self *Program) PEXTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PEXTRB", 3, Operands { v0, v1, v2 })
+ // PEXTRB imm8, xmm, r32
+ if isImm8(v0) && isXMM(v1) && isReg32(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PEXTRB imm8, xmm, m8
+ if isImm8(v0) && isXMM(v1) && isM8(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x14)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PEXTRB")
+ }
+ return p
+}
+
+// PEXTRD performs "Extract Doubleword".
+//
+// Mnemonic : PEXTRD
+// Supported forms : (2 forms)
+//
+// * PEXTRD imm8, xmm, r32 [SSE4.1]
+// * PEXTRD imm8, xmm, m32 [SSE4.1]
+//
+func (self *Program) PEXTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PEXTRD", 3, Operands { v0, v1, v2 })
+ // PEXTRD imm8, xmm, r32
+ if isImm8(v0) && isXMM(v1) && isReg32(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PEXTRD imm8, xmm, m32
+ if isImm8(v0) && isXMM(v1) && isM32(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PEXTRD")
+ }
+ return p
+}
+
+// PEXTRQ performs "Extract Quadword".
+//
+// Mnemonic : PEXTRQ
+// Supported forms : (2 forms)
+//
+// * PEXTRQ imm8, xmm, r64 [SSE4.1]
+// * PEXTRQ imm8, xmm, m64 [SSE4.1]
+//
+func (self *Program) PEXTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PEXTRQ", 3, Operands { v0, v1, v2 })
+ // PEXTRQ imm8, xmm, r64
+ if isImm8(v0) && isXMM(v1) && isReg64(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PEXTRQ imm8, xmm, m64
+ if isImm8(v0) && isXMM(v1) && isM64(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexm(1, hcode(v[1]), addr(v[2]))
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PEXTRQ")
+ }
+ return p
+}
+
+// PEXTRW performs "Extract Word".
+//
+// Mnemonic : PEXTRW
+// Supported forms : (3 forms)
+//
+// * PEXTRW imm8, mm, r32 [MMX+]
+// * PEXTRW imm8, xmm, r32 [SSE4.1]
+// * PEXTRW imm8, xmm, m16 [SSE4.1]
+//
+func (self *Program) PEXTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PEXTRW", 3, Operands { v0, v1, v2 })
+ // PEXTRW imm8, mm, r32
+ if isImm8(v0) && isMM(v1) && isReg32(v2) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PEXTRW imm8, xmm, r32
+ if isImm8(v0) && isXMM(v1) && isReg32(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PEXTRW imm8, xmm, m16
+ if isImm8(v0) && isXMM(v1) && isM16(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x15)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PEXTRW")
+ }
+ return p
+}
+
+// PF2ID performs "Packed Floating-Point to Integer Doubleword Converson".
+//
+// Mnemonic : PF2ID
+// Supported forms : (2 forms)
+//
+// * PF2ID mm, mm [3dnow!]
+// * PF2ID m64, mm [3dnow!]
+//
+func (self *Program) PF2ID(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PF2ID", 2, Operands { v0, v1 })
+ // PF2ID mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x1d)
+ })
+ }
+ // PF2ID m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x1d)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PF2ID")
+ }
+ return p
+}
+
+// PF2IW performs "Packed Floating-Point to Integer Word Conversion".
+//
+// Mnemonic : PF2IW
+// Supported forms : (2 forms)
+//
+// * PF2IW mm, mm [3dnow!+]
+// * PF2IW m64, mm [3dnow!+]
+//
+func (self *Program) PF2IW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PF2IW", 2, Operands { v0, v1 })
+ // PF2IW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x1c)
+ })
+ }
+ // PF2IW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x1c)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PF2IW")
+ }
+ return p
+}
+
+// PFACC performs "Packed Floating-Point Accumulate".
+//
+// Mnemonic : PFACC
+// Supported forms : (2 forms)
+//
+// * PFACC mm, mm [3dnow!]
+// * PFACC m64, mm [3dnow!]
+//
+func (self *Program) PFACC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFACC", 2, Operands { v0, v1 })
+ // PFACC mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xae)
+ })
+ }
+ // PFACC m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xae)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFACC")
+ }
+ return p
+}
+
+// PFADD performs "Packed Floating-Point Add".
+//
+// Mnemonic : PFADD
+// Supported forms : (2 forms)
+//
+// * PFADD mm, mm [3dnow!]
+// * PFADD m64, mm [3dnow!]
+//
+func (self *Program) PFADD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFADD", 2, Operands { v0, v1 })
+ // PFADD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x9e)
+ })
+ }
+ // PFADD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x9e)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFADD")
+ }
+ return p
+}
+
+// PFCMPEQ performs "Packed Floating-Point Compare for Equal".
+//
+// Mnemonic : PFCMPEQ
+// Supported forms : (2 forms)
+//
+// * PFCMPEQ mm, mm [3dnow!]
+// * PFCMPEQ m64, mm [3dnow!]
+//
+func (self *Program) PFCMPEQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFCMPEQ", 2, Operands { v0, v1 })
+ // PFCMPEQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xb0)
+ })
+ }
+ // PFCMPEQ m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xb0)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFCMPEQ")
+ }
+ return p
+}
+
+// PFCMPGE performs "Packed Floating-Point Compare for Greater or Equal".
+//
+// Mnemonic : PFCMPGE
+// Supported forms : (2 forms)
+//
+// * PFCMPGE mm, mm [3dnow!]
+// * PFCMPGE m64, mm [3dnow!]
+//
+func (self *Program) PFCMPGE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFCMPGE", 2, Operands { v0, v1 })
+ // PFCMPGE mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x90)
+ })
+ }
+ // PFCMPGE m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x90)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFCMPGE")
+ }
+ return p
+}
+
+// PFCMPGT performs "Packed Floating-Point Compare for Greater Than".
+//
+// Mnemonic : PFCMPGT
+// Supported forms : (2 forms)
+//
+// * PFCMPGT mm, mm [3dnow!]
+// * PFCMPGT m64, mm [3dnow!]
+//
+func (self *Program) PFCMPGT(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFCMPGT", 2, Operands { v0, v1 })
+ // PFCMPGT mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xa0)
+ })
+ }
+ // PFCMPGT m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xa0)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFCMPGT")
+ }
+ return p
+}
+
+// PFMAX performs "Packed Floating-Point Maximum".
+//
+// Mnemonic : PFMAX
+// Supported forms : (2 forms)
+//
+// * PFMAX mm, mm [3dnow!]
+// * PFMAX m64, mm [3dnow!]
+//
+func (self *Program) PFMAX(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFMAX", 2, Operands { v0, v1 })
+ // PFMAX mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xa4)
+ })
+ }
+ // PFMAX m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xa4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFMAX")
+ }
+ return p
+}
+
+// PFMIN performs "Packed Floating-Point Minimum".
+//
+// Mnemonic : PFMIN
+// Supported forms : (2 forms)
+//
+// * PFMIN mm, mm [3dnow!]
+// * PFMIN m64, mm [3dnow!]
+//
+func (self *Program) PFMIN(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFMIN", 2, Operands { v0, v1 })
+ // PFMIN mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x94)
+ })
+ }
+ // PFMIN m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x94)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFMIN")
+ }
+ return p
+}
+
+// PFMUL performs "Packed Floating-Point Multiply".
+//
+// Mnemonic : PFMUL
+// Supported forms : (2 forms)
+//
+// * PFMUL mm, mm [3dnow!]
+// * PFMUL m64, mm [3dnow!]
+//
+func (self *Program) PFMUL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFMUL", 2, Operands { v0, v1 })
+ // PFMUL mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xb4)
+ })
+ }
+ // PFMUL m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xb4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFMUL")
+ }
+ return p
+}
+
+// PFNACC performs "Packed Floating-Point Negative Accumulate".
+//
+// Mnemonic : PFNACC
+// Supported forms : (2 forms)
+//
+// * PFNACC mm, mm [3dnow!+]
+// * PFNACC m64, mm [3dnow!+]
+//
+func (self *Program) PFNACC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFNACC", 2, Operands { v0, v1 })
+ // PFNACC mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x8a)
+ })
+ }
+ // PFNACC m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x8a)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFNACC")
+ }
+ return p
+}
+
+// PFPNACC performs "Packed Floating-Point Positive-Negative Accumulate".
+//
+// Mnemonic : PFPNACC
+// Supported forms : (2 forms)
+//
+// * PFPNACC mm, mm [3dnow!+]
+// * PFPNACC m64, mm [3dnow!+]
+//
+func (self *Program) PFPNACC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFPNACC", 2, Operands { v0, v1 })
+ // PFPNACC mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x8e)
+ })
+ }
+ // PFPNACC m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x8e)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFPNACC")
+ }
+ return p
+}
+
+// PFRCP performs "Packed Floating-Point Reciprocal Approximation".
+//
+// Mnemonic : PFRCP
+// Supported forms : (2 forms)
+//
+// * PFRCP mm, mm [3dnow!]
+// * PFRCP m64, mm [3dnow!]
+//
+func (self *Program) PFRCP(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFRCP", 2, Operands { v0, v1 })
+ // PFRCP mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x96)
+ })
+ }
+ // PFRCP m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x96)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFRCP")
+ }
+ return p
+}
+
+// PFRCPIT1 performs "Packed Floating-Point Reciprocal Iteration 1".
+//
+// Mnemonic : PFRCPIT1
+// Supported forms : (2 forms)
+//
+// * PFRCPIT1 mm, mm [3dnow!]
+// * PFRCPIT1 m64, mm [3dnow!]
+//
+func (self *Program) PFRCPIT1(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFRCPIT1", 2, Operands { v0, v1 })
+ // PFRCPIT1 mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xa6)
+ })
+ }
+ // PFRCPIT1 m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xa6)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFRCPIT1")
+ }
+ return p
+}
+
+// PFRCPIT2 performs "Packed Floating-Point Reciprocal Iteration 2".
+//
+// Mnemonic : PFRCPIT2
+// Supported forms : (2 forms)
+//
+// * PFRCPIT2 mm, mm [3dnow!]
+// * PFRCPIT2 m64, mm [3dnow!]
+//
+func (self *Program) PFRCPIT2(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFRCPIT2", 2, Operands { v0, v1 })
+ // PFRCPIT2 mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xb6)
+ })
+ }
+ // PFRCPIT2 m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xb6)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFRCPIT2")
+ }
+ return p
+}
+
+// PFRSQIT1 performs "Packed Floating-Point Reciprocal Square Root Iteration 1".
+//
+// Mnemonic : PFRSQIT1
+// Supported forms : (2 forms)
+//
+// * PFRSQIT1 mm, mm [3dnow!]
+// * PFRSQIT1 m64, mm [3dnow!]
+//
+func (self *Program) PFRSQIT1(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFRSQIT1", 2, Operands { v0, v1 })
+ // PFRSQIT1 mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xa7)
+ })
+ }
+ // PFRSQIT1 m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xa7)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFRSQIT1")
+ }
+ return p
+}
+
+// PFRSQRT performs "Packed Floating-Point Reciprocal Square Root Approximation".
+//
+// Mnemonic : PFRSQRT
+// Supported forms : (2 forms)
+//
+// * PFRSQRT mm, mm [3dnow!]
+// * PFRSQRT m64, mm [3dnow!]
+//
+func (self *Program) PFRSQRT(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFRSQRT", 2, Operands { v0, v1 })
+ // PFRSQRT mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x97)
+ })
+ }
+ // PFRSQRT m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x97)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFRSQRT")
+ }
+ return p
+}
+
+// PFSUB performs "Packed Floating-Point Subtract".
+//
+// Mnemonic : PFSUB
+// Supported forms : (2 forms)
+//
+// * PFSUB mm, mm [3dnow!]
+// * PFSUB m64, mm [3dnow!]
+//
+func (self *Program) PFSUB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFSUB", 2, Operands { v0, v1 })
+ // PFSUB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x9a)
+ })
+ }
+ // PFSUB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x9a)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFSUB")
+ }
+ return p
+}
+
+// PFSUBR performs "Packed Floating-Point Subtract Reverse".
+//
+// Mnemonic : PFSUBR
+// Supported forms : (2 forms)
+//
+// * PFSUBR mm, mm [3dnow!]
+// * PFSUBR m64, mm [3dnow!]
+//
+func (self *Program) PFSUBR(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PFSUBR", 2, Operands { v0, v1 })
+ // PFSUBR mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xaa)
+ })
+ }
+ // PFSUBR m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xaa)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PFSUBR")
+ }
+ return p
+}
+
+// PHADDD performs "Packed Horizontal Add Doubleword Integer".
+//
+// Mnemonic : PHADDD
+// Supported forms : (4 forms)
+//
+// * PHADDD mm, mm [SSSE3]
+// * PHADDD m64, mm [SSSE3]
+// * PHADDD xmm, xmm [SSSE3]
+// * PHADDD m128, xmm [SSSE3]
+//
+func (self *Program) PHADDD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PHADDD", 2, Operands { v0, v1 })
+ // PHADDD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x02)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHADDD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x02)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PHADDD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x02)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHADDD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x02)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PHADDD")
+ }
+ return p
+}
+
+// PHADDSW performs "Packed Horizontal Add Signed Word Integers with Signed Saturation".
+//
+// Mnemonic : PHADDSW
+// Supported forms : (4 forms)
+//
+// * PHADDSW mm, mm [SSSE3]
+// * PHADDSW m64, mm [SSSE3]
+// * PHADDSW xmm, xmm [SSSE3]
+// * PHADDSW m128, xmm [SSSE3]
+//
+func (self *Program) PHADDSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PHADDSW", 2, Operands { v0, v1 })
+ // PHADDSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHADDSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x03)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PHADDSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHADDSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x03)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PHADDSW")
+ }
+ return p
+}
+
+// PHADDW performs "Packed Horizontal Add Word Integers".
+//
+// Mnemonic : PHADDW
+// Supported forms : (4 forms)
+//
+// * PHADDW mm, mm [SSSE3]
+// * PHADDW m64, mm [SSSE3]
+// * PHADDW xmm, xmm [SSSE3]
+// * PHADDW m128, xmm [SSSE3]
+//
+func (self *Program) PHADDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PHADDW", 2, Operands { v0, v1 })
+ // PHADDW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHADDW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x01)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PHADDW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHADDW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x01)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PHADDW")
+ }
+ return p
+}
+
+// PHMINPOSUW performs "Packed Horizontal Minimum of Unsigned Word Integers".
+//
+// Mnemonic : PHMINPOSUW
+// Supported forms : (2 forms)
+//
+// * PHMINPOSUW xmm, xmm [SSE4.1]
+// * PHMINPOSUW m128, xmm [SSE4.1]
+//
+func (self *Program) PHMINPOSUW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PHMINPOSUW", 2, Operands { v0, v1 })
+ // PHMINPOSUW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHMINPOSUW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x41)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PHMINPOSUW")
+ }
+ return p
+}
+
+// PHSUBD performs "Packed Horizontal Subtract Doubleword Integers".
+//
+// Mnemonic : PHSUBD
+// Supported forms : (4 forms)
+//
+// * PHSUBD mm, mm [SSSE3]
+// * PHSUBD m64, mm [SSSE3]
+// * PHSUBD xmm, xmm [SSSE3]
+// * PHSUBD m128, xmm [SSSE3]
+//
+func (self *Program) PHSUBD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PHSUBD", 2, Operands { v0, v1 })
+ // PHSUBD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x06)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHSUBD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x06)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PHSUBD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x06)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHSUBD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x06)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PHSUBD")
+ }
+ return p
+}
+
+// PHSUBSW performs "Packed Horizontal Subtract Signed Word Integers with Signed Saturation".
+//
+// Mnemonic : PHSUBSW
+// Supported forms : (4 forms)
+//
+// * PHSUBSW mm, mm [SSSE3]
+// * PHSUBSW m64, mm [SSSE3]
+// * PHSUBSW xmm, xmm [SSSE3]
+// * PHSUBSW m128, xmm [SSSE3]
+//
+func (self *Program) PHSUBSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PHSUBSW", 2, Operands { v0, v1 })
+ // PHSUBSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x07)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHSUBSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x07)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PHSUBSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x07)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHSUBSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x07)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PHSUBSW")
+ }
+ return p
+}
+
+// PHSUBW performs "Packed Horizontal Subtract Word Integers".
+//
+// Mnemonic : PHSUBW
+// Supported forms : (4 forms)
+//
+// * PHSUBW mm, mm [SSSE3]
+// * PHSUBW m64, mm [SSSE3]
+// * PHSUBW xmm, xmm [SSSE3]
+// * PHSUBW m128, xmm [SSSE3]
+//
+func (self *Program) PHSUBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PHSUBW", 2, Operands { v0, v1 })
+ // PHSUBW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x05)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHSUBW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x05)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PHSUBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x05)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PHSUBW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x05)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PHSUBW")
+ }
+ return p
+}
+
+// PI2FD performs "Packed Integer to Floating-Point Doubleword Conversion".
+//
+// Mnemonic : PI2FD
+// Supported forms : (2 forms)
+//
+// * PI2FD mm, mm [3dnow!]
+// * PI2FD m64, mm [3dnow!]
+//
+func (self *Program) PI2FD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PI2FD", 2, Operands { v0, v1 })
+ // PI2FD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x0d)
+ })
+ }
+ // PI2FD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x0d)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PI2FD")
+ }
+ return p
+}
+
+// PI2FW performs "Packed Integer to Floating-Point Word Conversion".
+//
+// Mnemonic : PI2FW
+// Supported forms : (2 forms)
+//
+// * PI2FW mm, mm [3dnow!+]
+// * PI2FW m64, mm [3dnow!+]
+//
+func (self *Program) PI2FW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PI2FW", 2, Operands { v0, v1 })
+ // PI2FW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0x0c)
+ })
+ }
+ // PI2FW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0x0c)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PI2FW")
+ }
+ return p
+}
+
+// PINSRB performs "Insert Byte".
+//
+// Mnemonic : PINSRB
+// Supported forms : (2 forms)
+//
+// * PINSRB imm8, r32, xmm [SSE4.1]
+// * PINSRB imm8, m8, xmm [SSE4.1]
+//
+func (self *Program) PINSRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PINSRB", 3, Operands { v0, v1, v2 })
+ // PINSRB imm8, r32, xmm
+ if isImm8(v0) && isReg32(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PINSRB imm8, m8, xmm
+ if isImm8(v0) && isM8(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x20)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PINSRB")
+ }
+ return p
+}
+
+// PINSRD performs "Insert Doubleword".
+//
+// Mnemonic : PINSRD
+// Supported forms : (2 forms)
+//
+// * PINSRD imm8, r32, xmm [SSE4.1]
+// * PINSRD imm8, m32, xmm [SSE4.1]
+//
+func (self *Program) PINSRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PINSRD", 3, Operands { v0, v1, v2 })
+ // PINSRD imm8, r32, xmm
+ if isImm8(v0) && isReg32(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PINSRD imm8, m32, xmm
+ if isImm8(v0) && isM32(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x22)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PINSRD")
+ }
+ return p
+}
+
+// PINSRQ performs "Insert Quadword".
+//
+// Mnemonic : PINSRQ
+// Supported forms : (2 forms)
+//
+// * PINSRQ imm8, r64, xmm [SSE4.1]
+// * PINSRQ imm8, m64, xmm [SSE4.1]
+//
+func (self *Program) PINSRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PINSRQ", 3, Operands { v0, v1, v2 })
+ // PINSRQ imm8, r64, xmm
+ if isImm8(v0) && isReg64(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PINSRQ imm8, m64, xmm
+ if isImm8(v0) && isM64(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexm(1, hcode(v[2]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x22)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PINSRQ")
+ }
+ return p
+}
+
+// PINSRW performs "Insert Word".
+//
+// Mnemonic : PINSRW
+// Supported forms : (4 forms)
+//
+// * PINSRW imm8, r32, mm [MMX+]
+// * PINSRW imm8, m16, mm [MMX+]
+// * PINSRW imm8, r32, xmm [SSE2]
+// * PINSRW imm8, m16, xmm [SSE2]
+//
+func (self *Program) PINSRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PINSRW", 3, Operands { v0, v1, v2 })
+ // PINSRW imm8, r32, mm
+ if isImm8(v0) && isReg32(v1) && isMM(v2) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PINSRW imm8, m16, mm
+ if isImm8(v0) && isM16(v1) && isMM(v2) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc4)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PINSRW imm8, r32, xmm
+ if isImm8(v0) && isReg32(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PINSRW imm8, m16, xmm
+ if isImm8(v0) && isM16(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc4)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PINSRW")
+ }
+ return p
+}
+
+// PMADDUBSW performs "Multiply and Add Packed Signed and Unsigned Byte Integers".
+//
+// Mnemonic : PMADDUBSW
+// Supported forms : (4 forms)
+//
+// * PMADDUBSW mm, mm [SSSE3]
+// * PMADDUBSW m64, mm [SSSE3]
+// * PMADDUBSW xmm, xmm [SSSE3]
+// * PMADDUBSW m128, xmm [SSSE3]
+//
+func (self *Program) PMADDUBSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMADDUBSW", 2, Operands { v0, v1 })
+ // PMADDUBSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMADDUBSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x04)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMADDUBSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMADDUBSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x04)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMADDUBSW")
+ }
+ return p
+}
+
+// PMADDWD performs "Multiply and Add Packed Signed Word Integers".
+//
+// Mnemonic : PMADDWD
+// Supported forms : (4 forms)
+//
+// * PMADDWD mm, mm [MMX]
+// * PMADDWD m64, mm [MMX]
+// * PMADDWD xmm, xmm [SSE2]
+// * PMADDWD m128, xmm [SSE2]
+//
+func (self *Program) PMADDWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMADDWD", 2, Operands { v0, v1 })
+ // PMADDWD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMADDWD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf5)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMADDWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMADDWD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf5)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMADDWD")
+ }
+ return p
+}
+
+// PMAXSB performs "Maximum of Packed Signed Byte Integers".
+//
+// Mnemonic : PMAXSB
+// Supported forms : (2 forms)
+//
+// * PMAXSB xmm, xmm [SSE4.1]
+// * PMAXSB m128, xmm [SSE4.1]
+//
+func (self *Program) PMAXSB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMAXSB", 2, Operands { v0, v1 })
+ // PMAXSB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMAXSB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMAXSB")
+ }
+ return p
+}
+
+// PMAXSD performs "Maximum of Packed Signed Doubleword Integers".
+//
+// Mnemonic : PMAXSD
+// Supported forms : (2 forms)
+//
+// * PMAXSD xmm, xmm [SSE4.1]
+// * PMAXSD m128, xmm [SSE4.1]
+//
+func (self *Program) PMAXSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMAXSD", 2, Operands { v0, v1 })
+ // PMAXSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMAXSD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMAXSD")
+ }
+ return p
+}
+
+// PMAXSW performs "Maximum of Packed Signed Word Integers".
+//
+// Mnemonic : PMAXSW
+// Supported forms : (4 forms)
+//
+// * PMAXSW mm, mm [MMX+]
+// * PMAXSW m64, mm [MMX+]
+// * PMAXSW xmm, xmm [SSE2]
+// * PMAXSW m128, xmm [SSE2]
+//
+func (self *Program) PMAXSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMAXSW", 2, Operands { v0, v1 })
+ // PMAXSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xee)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMAXSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xee)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMAXSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xee)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMAXSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xee)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMAXSW")
+ }
+ return p
+}
+
+// PMAXUB performs "Maximum of Packed Unsigned Byte Integers".
+//
+// Mnemonic : PMAXUB
+// Supported forms : (4 forms)
+//
+// * PMAXUB mm, mm [MMX+]
+// * PMAXUB m64, mm [MMX+]
+// * PMAXUB xmm, xmm [SSE2]
+// * PMAXUB m128, xmm [SSE2]
+//
+func (self *Program) PMAXUB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMAXUB", 2, Operands { v0, v1 })
+ // PMAXUB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xde)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMAXUB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xde)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMAXUB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xde)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMAXUB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xde)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMAXUB")
+ }
+ return p
+}
+
+// PMAXUD performs "Maximum of Packed Unsigned Doubleword Integers".
+//
+// Mnemonic : PMAXUD
+// Supported forms : (2 forms)
+//
+// * PMAXUD xmm, xmm [SSE4.1]
+// * PMAXUD m128, xmm [SSE4.1]
+//
+func (self *Program) PMAXUD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMAXUD", 2, Operands { v0, v1 })
+ // PMAXUD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMAXUD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMAXUD")
+ }
+ return p
+}
+
+// PMAXUW performs "Maximum of Packed Unsigned Word Integers".
+//
+// Mnemonic : PMAXUW
+// Supported forms : (2 forms)
+//
+// * PMAXUW xmm, xmm [SSE4.1]
+// * PMAXUW m128, xmm [SSE4.1]
+//
+func (self *Program) PMAXUW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMAXUW", 2, Operands { v0, v1 })
+ // PMAXUW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMAXUW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMAXUW")
+ }
+ return p
+}
+
+// PMINSB performs "Minimum of Packed Signed Byte Integers".
+//
+// Mnemonic : PMINSB
+// Supported forms : (2 forms)
+//
+// * PMINSB xmm, xmm [SSE4.1]
+// * PMINSB m128, xmm [SSE4.1]
+//
+func (self *Program) PMINSB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMINSB", 2, Operands { v0, v1 })
+ // PMINSB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMINSB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x38)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMINSB")
+ }
+ return p
+}
+
+// PMINSD performs "Minimum of Packed Signed Doubleword Integers".
+//
+// Mnemonic : PMINSD
+// Supported forms : (2 forms)
+//
+// * PMINSD xmm, xmm [SSE4.1]
+// * PMINSD m128, xmm [SSE4.1]
+//
+func (self *Program) PMINSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMINSD", 2, Operands { v0, v1 })
+ // PMINSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMINSD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x39)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMINSD")
+ }
+ return p
+}
+
+// PMINSW performs "Minimum of Packed Signed Word Integers".
+//
+// Mnemonic : PMINSW
+// Supported forms : (4 forms)
+//
+// * PMINSW mm, mm [MMX+]
+// * PMINSW m64, mm [MMX+]
+// * PMINSW xmm, xmm [SSE2]
+// * PMINSW m128, xmm [SSE2]
+//
+func (self *Program) PMINSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMINSW", 2, Operands { v0, v1 })
+ // PMINSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xea)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMINSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xea)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMINSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xea)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMINSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xea)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMINSW")
+ }
+ return p
+}
+
+// PMINUB performs "Minimum of Packed Unsigned Byte Integers".
+//
+// Mnemonic : PMINUB
+// Supported forms : (4 forms)
+//
+// * PMINUB mm, mm [MMX+]
+// * PMINUB m64, mm [MMX+]
+// * PMINUB xmm, xmm [SSE2]
+// * PMINUB m128, xmm [SSE2]
+//
+func (self *Program) PMINUB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMINUB", 2, Operands { v0, v1 })
+ // PMINUB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xda)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMINUB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xda)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMINUB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xda)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMINUB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xda)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMINUB")
+ }
+ return p
+}
+
+// PMINUD performs "Minimum of Packed Unsigned Doubleword Integers".
+//
+// Mnemonic : PMINUD
+// Supported forms : (2 forms)
+//
+// * PMINUD xmm, xmm [SSE4.1]
+// * PMINUD m128, xmm [SSE4.1]
+//
+func (self *Program) PMINUD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMINUD", 2, Operands { v0, v1 })
+ // PMINUD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMINUD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMINUD")
+ }
+ return p
+}
+
+// PMINUW performs "Minimum of Packed Unsigned Word Integers".
+//
+// Mnemonic : PMINUW
+// Supported forms : (2 forms)
+//
+// * PMINUW xmm, xmm [SSE4.1]
+// * PMINUW m128, xmm [SSE4.1]
+//
+func (self *Program) PMINUW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMINUW", 2, Operands { v0, v1 })
+ // PMINUW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMINUW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x3a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMINUW")
+ }
+ return p
+}
+
+// PMOVMSKB performs "Move Byte Mask".
+//
+// Mnemonic : PMOVMSKB
+// Supported forms : (2 forms)
+//
+// * PMOVMSKB mm, r32 [MMX+]
+// * PMOVMSKB xmm, r32 [SSE2]
+//
+func (self *Program) PMOVMSKB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVMSKB", 2, Operands { v0, v1 })
+ // PMOVMSKB mm, r32
+ if isMM(v0) && isReg32(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVMSKB xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVMSKB")
+ }
+ return p
+}
+
+// PMOVSXBD performs "Move Packed Byte Integers to Doubleword Integers with Sign Extension".
+//
+// Mnemonic : PMOVSXBD
+// Supported forms : (2 forms)
+//
+// * PMOVSXBD xmm, xmm [SSE4.1]
+// * PMOVSXBD m32, xmm [SSE4.1]
+//
+func (self *Program) PMOVSXBD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVSXBD", 2, Operands { v0, v1 })
+ // PMOVSXBD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVSXBD m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x21)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVSXBD")
+ }
+ return p
+}
+
+// PMOVSXBQ performs "Move Packed Byte Integers to Quadword Integers with Sign Extension".
+//
+// Mnemonic : PMOVSXBQ
+// Supported forms : (2 forms)
+//
+// * PMOVSXBQ xmm, xmm [SSE4.1]
+// * PMOVSXBQ m16, xmm [SSE4.1]
+//
+func (self *Program) PMOVSXBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVSXBQ", 2, Operands { v0, v1 })
+ // PMOVSXBQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVSXBQ m16, xmm
+ if isM16(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x22)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVSXBQ")
+ }
+ return p
+}
+
+// PMOVSXBW performs "Move Packed Byte Integers to Word Integers with Sign Extension".
+//
+// Mnemonic : PMOVSXBW
+// Supported forms : (2 forms)
+//
+// * PMOVSXBW xmm, xmm [SSE4.1]
+// * PMOVSXBW m64, xmm [SSE4.1]
+//
+func (self *Program) PMOVSXBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVSXBW", 2, Operands { v0, v1 })
+ // PMOVSXBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVSXBW m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x20)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVSXBW")
+ }
+ return p
+}
+
+// PMOVSXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Sign Extension".
+//
+// Mnemonic : PMOVSXDQ
+// Supported forms : (2 forms)
+//
+// * PMOVSXDQ xmm, xmm [SSE4.1]
+// * PMOVSXDQ m64, xmm [SSE4.1]
+//
+func (self *Program) PMOVSXDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVSXDQ", 2, Operands { v0, v1 })
+ // PMOVSXDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVSXDQ m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x25)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVSXDQ")
+ }
+ return p
+}
+
+// PMOVSXWD performs "Move Packed Word Integers to Doubleword Integers with Sign Extension".
+//
+// Mnemonic : PMOVSXWD
+// Supported forms : (2 forms)
+//
+// * PMOVSXWD xmm, xmm [SSE4.1]
+// * PMOVSXWD m64, xmm [SSE4.1]
+//
+func (self *Program) PMOVSXWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVSXWD", 2, Operands { v0, v1 })
+ // PMOVSXWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVSXWD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x23)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVSXWD")
+ }
+ return p
+}
+
+// PMOVSXWQ performs "Move Packed Word Integers to Quadword Integers with Sign Extension".
+//
+// Mnemonic : PMOVSXWQ
+// Supported forms : (2 forms)
+//
+// * PMOVSXWQ xmm, xmm [SSE4.1]
+// * PMOVSXWQ m32, xmm [SSE4.1]
+//
+func (self *Program) PMOVSXWQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVSXWQ", 2, Operands { v0, v1 })
+ // PMOVSXWQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x24)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVSXWQ m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x24)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVSXWQ")
+ }
+ return p
+}
+
+// PMOVZXBD performs "Move Packed Byte Integers to Doubleword Integers with Zero Extension".
+//
+// Mnemonic : PMOVZXBD
+// Supported forms : (2 forms)
+//
+// * PMOVZXBD xmm, xmm [SSE4.1]
+// * PMOVZXBD m32, xmm [SSE4.1]
+//
+func (self *Program) PMOVZXBD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVZXBD", 2, Operands { v0, v1 })
+ // PMOVZXBD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVZXBD m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x31)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVZXBD")
+ }
+ return p
+}
+
+// PMOVZXBQ performs "Move Packed Byte Integers to Quadword Integers with Zero Extension".
+//
+// Mnemonic : PMOVZXBQ
+// Supported forms : (2 forms)
+//
+// * PMOVZXBQ xmm, xmm [SSE4.1]
+// * PMOVZXBQ m16, xmm [SSE4.1]
+//
+func (self *Program) PMOVZXBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVZXBQ", 2, Operands { v0, v1 })
+ // PMOVZXBQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVZXBQ m16, xmm
+ if isM16(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x32)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVZXBQ")
+ }
+ return p
+}
+
+// PMOVZXBW performs "Move Packed Byte Integers to Word Integers with Zero Extension".
+//
+// Mnemonic : PMOVZXBW
+// Supported forms : (2 forms)
+//
+// * PMOVZXBW xmm, xmm [SSE4.1]
+// * PMOVZXBW m64, xmm [SSE4.1]
+//
+func (self *Program) PMOVZXBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVZXBW", 2, Operands { v0, v1 })
+ // PMOVZXBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVZXBW m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x30)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVZXBW")
+ }
+ return p
+}
+
+// PMOVZXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Zero Extension".
+//
+// Mnemonic : PMOVZXDQ
+// Supported forms : (2 forms)
+//
+// * PMOVZXDQ xmm, xmm [SSE4.1]
+// * PMOVZXDQ m64, xmm [SSE4.1]
+//
+func (self *Program) PMOVZXDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVZXDQ", 2, Operands { v0, v1 })
+ // PMOVZXDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x35)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVZXDQ m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x35)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVZXDQ")
+ }
+ return p
+}
+
+// PMOVZXWD performs "Move Packed Word Integers to Doubleword Integers with Zero Extension".
+//
+// Mnemonic : PMOVZXWD
+// Supported forms : (2 forms)
+//
+// * PMOVZXWD xmm, xmm [SSE4.1]
+// * PMOVZXWD m64, xmm [SSE4.1]
+//
+func (self *Program) PMOVZXWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVZXWD", 2, Operands { v0, v1 })
+ // PMOVZXWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVZXWD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x33)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVZXWD")
+ }
+ return p
+}
+
+// PMOVZXWQ performs "Move Packed Word Integers to Quadword Integers with Zero Extension".
+//
+// Mnemonic : PMOVZXWQ
+// Supported forms : (2 forms)
+//
+// * PMOVZXWQ xmm, xmm [SSE4.1]
+// * PMOVZXWQ m32, xmm [SSE4.1]
+//
+func (self *Program) PMOVZXWQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMOVZXWQ", 2, Operands { v0, v1 })
+ // PMOVZXWQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x34)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMOVZXWQ m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x34)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMOVZXWQ")
+ }
+ return p
+}
+
+// PMULDQ performs "Multiply Packed Signed Doubleword Integers and Store Quadword Result".
+//
+// Mnemonic : PMULDQ
+// Supported forms : (2 forms)
+//
+// * PMULDQ xmm, xmm [SSE4.1]
+// * PMULDQ m128, xmm [SSE4.1]
+//
+func (self *Program) PMULDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMULDQ", 2, Operands { v0, v1 })
+ // PMULDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMULDQ")
+ }
+ return p
+}
+
+// PMULHRSW performs "Packed Multiply Signed Word Integers and Store High Result with Round and Scale".
+//
+// Mnemonic : PMULHRSW
+// Supported forms : (4 forms)
+//
+// * PMULHRSW mm, mm [SSSE3]
+// * PMULHRSW m64, mm [SSSE3]
+// * PMULHRSW xmm, xmm [SSSE3]
+// * PMULHRSW m128, xmm [SSSE3]
+//
+func (self *Program) PMULHRSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMULHRSW", 2, Operands { v0, v1 })
+ // PMULHRSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULHRSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x0b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMULHRSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULHRSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x0b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMULHRSW")
+ }
+ return p
+}
+
+// PMULHRW performs "Packed Multiply High Rounded Word".
+//
+// Mnemonic : PMULHRW
+// Supported forms : (2 forms)
+//
+// * PMULHRW mm, mm [3dnow!]
+// * PMULHRW m64, mm [3dnow!]
+//
+func (self *Program) PMULHRW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMULHRW", 2, Operands { v0, v1 })
+ // PMULHRW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xb7)
+ })
+ }
+ // PMULHRW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xb7)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMULHRW")
+ }
+ return p
+}
+
+// PMULHUW performs "Multiply Packed Unsigned Word Integers and Store High Result".
+//
+// Mnemonic : PMULHUW
+// Supported forms : (4 forms)
+//
+// * PMULHUW mm, mm [MMX+]
+// * PMULHUW m64, mm [MMX+]
+// * PMULHUW xmm, xmm [SSE2]
+// * PMULHUW m128, xmm [SSE2]
+//
+func (self *Program) PMULHUW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMULHUW", 2, Operands { v0, v1 })
+ // PMULHUW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULHUW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMULHUW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULHUW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMULHUW")
+ }
+ return p
+}
+
+// PMULHW performs "Multiply Packed Signed Word Integers and Store High Result".
+//
+// Mnemonic : PMULHW
+// Supported forms : (4 forms)
+//
+// * PMULHW mm, mm [MMX]
+// * PMULHW m64, mm [MMX]
+// * PMULHW xmm, xmm [SSE2]
+// * PMULHW m128, xmm [SSE2]
+//
+func (self *Program) PMULHW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMULHW", 2, Operands { v0, v1 })
+ // PMULHW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe5)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULHW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe5)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMULHW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe5)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULHW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe5)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMULHW")
+ }
+ return p
+}
+
+// PMULLD performs "Multiply Packed Signed Doubleword Integers and Store Low Result".
+//
+// Mnemonic : PMULLD
+// Supported forms : (2 forms)
+//
+// * PMULLD xmm, xmm [SSE4.1]
+// * PMULLD m128, xmm [SSE4.1]
+//
+func (self *Program) PMULLD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMULLD", 2, Operands { v0, v1 })
+ // PMULLD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULLD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x40)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMULLD")
+ }
+ return p
+}
+
+// PMULLW performs "Multiply Packed Signed Word Integers and Store Low Result".
+//
+// Mnemonic : PMULLW
+// Supported forms : (4 forms)
+//
+// * PMULLW mm, mm [MMX]
+// * PMULLW m64, mm [MMX]
+// * PMULLW xmm, xmm [SSE2]
+// * PMULLW m128, xmm [SSE2]
+//
+func (self *Program) PMULLW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMULLW", 2, Operands { v0, v1 })
+ // PMULLW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd5)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULLW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd5)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMULLW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd5)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULLW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd5)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMULLW")
+ }
+ return p
+}
+
+// PMULUDQ performs "Multiply Packed Unsigned Doubleword Integers".
+//
+// Mnemonic : PMULUDQ
+// Supported forms : (4 forms)
+//
+// * PMULUDQ mm, mm [SSE2]
+// * PMULUDQ m64, mm [SSE2]
+// * PMULUDQ xmm, xmm [SSE2]
+// * PMULUDQ m128, xmm [SSE2]
+//
+func (self *Program) PMULUDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PMULUDQ", 2, Operands { v0, v1 })
+ // PMULUDQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULUDQ m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PMULUDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PMULUDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PMULUDQ")
+ }
+ return p
+}
+
+// POPCNTL performs "Count of Number of Bits Set to 1".
+//
+// Mnemonic : POPCNT
+// Supported forms : (2 forms)
+//
+// * POPCNTL r32, r32 [POPCNT]
+// * POPCNTL m32, r32 [POPCNT]
+//
+func (self *Program) POPCNTL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("POPCNTL", 2, Operands { v0, v1 })
+ // POPCNTL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_POPCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // POPCNTL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_POPCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xb8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for POPCNTL")
+ }
+ return p
+}
+
+// POPCNTQ performs "Count of Number of Bits Set to 1".
+//
+// Mnemonic : POPCNT
+// Supported forms : (2 forms)
+//
+// * POPCNTQ r64, r64 [POPCNT]
+// * POPCNTQ m64, r64 [POPCNT]
+//
+func (self *Program) POPCNTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("POPCNTQ", 2, Operands { v0, v1 })
+ // POPCNTQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_POPCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // POPCNTQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_POPCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xb8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for POPCNTQ")
+ }
+ return p
+}
+
+// POPCNTW performs "Count of Number of Bits Set to 1".
+//
+// Mnemonic : POPCNT
+// Supported forms : (2 forms)
+//
+// * POPCNTW r16, r16 [POPCNT]
+// * POPCNTW m16, r16 [POPCNT]
+//
+func (self *Program) POPCNTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("POPCNTW", 2, Operands { v0, v1 })
+ // POPCNTW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_POPCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // POPCNTW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_POPCNT)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xb8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for POPCNTW")
+ }
+ return p
+}
+
+// POPQ performs "Pop a Value from the Stack".
+//
+// Mnemonic : POP
+// Supported forms : (2 forms)
+//
+// * POPQ r64
+// * POPQ m64
+//
+func (self *Program) POPQ(v0 interface{}) *Instruction {
+ p := self.alloc("POPQ", 1, Operands { v0 })
+ // POPQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0x58 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0x8f)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // POPQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x8f)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for POPQ")
+ }
+ return p
+}
+
+// POPW performs "Pop a Value from the Stack".
+//
+// Mnemonic : POP
+// Supported forms : (2 forms)
+//
+// * POPW r16
+// * POPW m16
+//
+func (self *Program) POPW(v0 interface{}) *Instruction {
+ p := self.alloc("POPW", 1, Operands { v0 })
+ // POPW r16
+ if isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0x58 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0x8f)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // POPW m16
+ if isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x8f)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for POPW")
+ }
+ return p
+}
+
+// POR performs "Packed Bitwise Logical OR".
+//
+// Mnemonic : POR
+// Supported forms : (4 forms)
+//
+// * POR mm, mm [MMX]
+// * POR m64, mm [MMX]
+// * POR xmm, xmm [SSE2]
+// * POR m128, xmm [SSE2]
+//
+func (self *Program) POR(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("POR", 2, Operands { v0, v1 })
+ // POR mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // POR m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xeb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // POR xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // POR m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xeb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for POR")
+ }
+ return p
+}
+
+// PREFETCH performs "Prefetch Data into Caches".
+//
+// Mnemonic : PREFETCH
+// Supported forms : (1 form)
+//
+// * PREFETCH m8 [PREFETCH]
+//
+func (self *Program) PREFETCH(v0 interface{}) *Instruction {
+ p := self.alloc("PREFETCH", 1, Operands { v0 })
+ // PREFETCH m8
+ if isM8(v0) {
+ self.require(ISA_PREFETCH)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0d)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PREFETCH")
+ }
+ return p
+}
+
+// PREFETCHNTA performs "Prefetch Data Into Caches using NTA Hint".
+//
+// Mnemonic : PREFETCHNTA
+// Supported forms : (1 form)
+//
+// * PREFETCHNTA m8 [MMX+]
+//
+func (self *Program) PREFETCHNTA(v0 interface{}) *Instruction {
+ p := self.alloc("PREFETCHNTA", 1, Operands { v0 })
+ // PREFETCHNTA m8
+ if isM8(v0) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x18)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PREFETCHNTA")
+ }
+ return p
+}
+
+// PREFETCHT0 performs "Prefetch Data Into Caches using T0 Hint".
+//
+// Mnemonic : PREFETCHT0
+// Supported forms : (1 form)
+//
+// * PREFETCHT0 m8 [MMX+]
+//
+func (self *Program) PREFETCHT0(v0 interface{}) *Instruction {
+ p := self.alloc("PREFETCHT0", 1, Operands { v0 })
+ // PREFETCHT0 m8
+ if isM8(v0) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x18)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PREFETCHT0")
+ }
+ return p
+}
+
+// PREFETCHT1 performs "Prefetch Data Into Caches using T1 Hint".
+//
+// Mnemonic : PREFETCHT1
+// Supported forms : (1 form)
+//
+// * PREFETCHT1 m8 [MMX+]
+//
+func (self *Program) PREFETCHT1(v0 interface{}) *Instruction {
+ p := self.alloc("PREFETCHT1", 1, Operands { v0 })
+ // PREFETCHT1 m8
+ if isM8(v0) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x18)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PREFETCHT1")
+ }
+ return p
+}
+
+// PREFETCHT2 performs "Prefetch Data Into Caches using T2 Hint".
+//
+// Mnemonic : PREFETCHT2
+// Supported forms : (1 form)
+//
+// * PREFETCHT2 m8 [MMX+]
+//
+func (self *Program) PREFETCHT2(v0 interface{}) *Instruction {
+ p := self.alloc("PREFETCHT2", 1, Operands { v0 })
+ // PREFETCHT2 m8
+ if isM8(v0) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x18)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PREFETCHT2")
+ }
+ return p
+}
+
+// PREFETCHW performs "Prefetch Data into Caches in Anticipation of a Write".
+//
+// Mnemonic : PREFETCHW
+// Supported forms : (1 form)
+//
+// * PREFETCHW m8 [PREFETCHW]
+//
+func (self *Program) PREFETCHW(v0 interface{}) *Instruction {
+ p := self.alloc("PREFETCHW", 1, Operands { v0 })
+ // PREFETCHW m8
+ if isM8(v0) {
+ self.require(ISA_PREFETCHW)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0d)
+ m.mrsd(1, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PREFETCHW")
+ }
+ return p
+}
+
+// PREFETCHWT1 performs "Prefetch Vector Data Into Caches with Intent to Write and T1 Hint".
+//
+// Mnemonic : PREFETCHWT1
+// Supported forms : (1 form)
+//
+// * PREFETCHWT1 m8 [PREFETCHWT1]
+//
+func (self *Program) PREFETCHWT1(v0 interface{}) *Instruction {
+ p := self.alloc("PREFETCHWT1", 1, Operands { v0 })
+ // PREFETCHWT1 m8
+ if isM8(v0) {
+ self.require(ISA_PREFETCHWT1)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0d)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PREFETCHWT1")
+ }
+ return p
+}
+
+// PSADBW performs "Compute Sum of Absolute Differences".
+//
+// Mnemonic : PSADBW
+// Supported forms : (4 forms)
+//
+// * PSADBW mm, mm [MMX+]
+// * PSADBW m64, mm [MMX+]
+// * PSADBW xmm, xmm [SSE2]
+// * PSADBW m128, xmm [SSE2]
+//
+func (self *Program) PSADBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSADBW", 2, Operands { v0, v1 })
+ // PSADBW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSADBW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSADBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSADBW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSADBW")
+ }
+ return p
+}
+
+// PSHUFB performs "Packed Shuffle Bytes".
+//
+// Mnemonic : PSHUFB
+// Supported forms : (4 forms)
+//
+// * PSHUFB mm, mm [SSSE3]
+// * PSHUFB m64, mm [SSSE3]
+// * PSHUFB xmm, xmm [SSSE3]
+// * PSHUFB m128, xmm [SSSE3]
+//
+func (self *Program) PSHUFB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSHUFB", 2, Operands { v0, v1 })
+ // PSHUFB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSHUFB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x00)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSHUFB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSHUFB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x00)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSHUFB")
+ }
+ return p
+}
+
+// PSHUFD performs "Shuffle Packed Doublewords".
+//
+// Mnemonic : PSHUFD
+// Supported forms : (2 forms)
+//
+// * PSHUFD imm8, xmm, xmm [SSE2]
+// * PSHUFD imm8, m128, xmm [SSE2]
+//
+func (self *Program) PSHUFD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PSHUFD", 3, Operands { v0, v1, v2 })
+ // PSHUFD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSHUFD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSHUFD")
+ }
+ return p
+}
+
+// PSHUFHW performs "Shuffle Packed High Words".
+//
+// Mnemonic : PSHUFHW
+// Supported forms : (2 forms)
+//
+// * PSHUFHW imm8, xmm, xmm [SSE2]
+// * PSHUFHW imm8, m128, xmm [SSE2]
+//
+func (self *Program) PSHUFHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PSHUFHW", 3, Operands { v0, v1, v2 })
+ // PSHUFHW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSHUFHW imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSHUFHW")
+ }
+ return p
+}
+
+// PSHUFLW performs "Shuffle Packed Low Words".
+//
+// Mnemonic : PSHUFLW
+// Supported forms : (2 forms)
+//
+// * PSHUFLW imm8, xmm, xmm [SSE2]
+// * PSHUFLW imm8, m128, xmm [SSE2]
+//
+func (self *Program) PSHUFLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PSHUFLW", 3, Operands { v0, v1, v2 })
+ // PSHUFLW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSHUFLW imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSHUFLW")
+ }
+ return p
+}
+
+// PSHUFW performs "Shuffle Packed Words".
+//
+// Mnemonic : PSHUFW
+// Supported forms : (2 forms)
+//
+// * PSHUFW imm8, mm, mm [MMX+]
+// * PSHUFW imm8, m64, mm [MMX+]
+//
+func (self *Program) PSHUFW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("PSHUFW", 3, Operands { v0, v1, v2 })
+ // PSHUFW imm8, mm, mm
+ if isImm8(v0) && isMM(v1) && isMM(v2) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSHUFW imm8, m64, mm
+ if isImm8(v0) && isM64(v1) && isMM(v2) {
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSHUFW")
+ }
+ return p
+}
+
+// PSIGNB performs "Packed Sign of Byte Integers".
+//
+// Mnemonic : PSIGNB
+// Supported forms : (4 forms)
+//
+// * PSIGNB mm, mm [SSSE3]
+// * PSIGNB m64, mm [SSSE3]
+// * PSIGNB xmm, xmm [SSSE3]
+// * PSIGNB m128, xmm [SSSE3]
+//
+func (self *Program) PSIGNB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSIGNB", 2, Operands { v0, v1 })
+ // PSIGNB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSIGNB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x08)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSIGNB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSIGNB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x08)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSIGNB")
+ }
+ return p
+}
+
+// PSIGND performs "Packed Sign of Doubleword Integers".
+//
+// Mnemonic : PSIGND
+// Supported forms : (4 forms)
+//
+// * PSIGND mm, mm [SSSE3]
+// * PSIGND m64, mm [SSSE3]
+// * PSIGND xmm, xmm [SSSE3]
+// * PSIGND m128, xmm [SSSE3]
+//
+func (self *Program) PSIGND(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSIGND", 2, Operands { v0, v1 })
+ // PSIGND mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x0a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSIGND m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x0a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSIGND xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x0a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSIGND m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x0a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSIGND")
+ }
+ return p
+}
+
+// PSIGNW performs "Packed Sign of Word Integers".
+//
+// Mnemonic : PSIGNW
+// Supported forms : (4 forms)
+//
+// * PSIGNW mm, mm [SSSE3]
+// * PSIGNW m64, mm [SSSE3]
+// * PSIGNW xmm, xmm [SSSE3]
+// * PSIGNW m128, xmm [SSSE3]
+//
+func (self *Program) PSIGNW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSIGNW", 2, Operands { v0, v1 })
+ // PSIGNW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSIGNW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x09)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSIGNW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSIGNW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSSE3)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x09)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSIGNW")
+ }
+ return p
+}
+
+// PSLLD performs "Shift Packed Doubleword Data Left Logical".
+//
+// Mnemonic : PSLLD
+// Supported forms : (6 forms)
+//
+// * PSLLD imm8, mm [MMX]
+// * PSLLD mm, mm [MMX]
+// * PSLLD m64, mm [MMX]
+// * PSLLD imm8, xmm [SSE2]
+// * PSLLD xmm, xmm [SSE2]
+// * PSLLD m128, xmm [SSE2]
+//
+func (self *Program) PSLLD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSLLD", 2, Operands { v0, v1 })
+ // PSLLD imm8, mm
+ if isImm8(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x72)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSLLD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf2)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSLLD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf2)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSLLD imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x72)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSLLD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf2)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSLLD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf2)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSLLD")
+ }
+ return p
+}
+
+// PSLLDQ performs "Shift Packed Double Quadword Left Logical".
+//
+// Mnemonic : PSLLDQ
+// Supported forms : (1 form)
+//
+// * PSLLDQ imm8, xmm [SSE2]
+//
+func (self *Program) PSLLDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSLLDQ", 2, Operands { v0, v1 })
+ // PSLLDQ imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x73)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSLLDQ")
+ }
+ return p
+}
+
+// PSLLQ performs "Shift Packed Quadword Data Left Logical".
+//
+// Mnemonic : PSLLQ
+// Supported forms : (6 forms)
+//
+// * PSLLQ imm8, mm [MMX]
+// * PSLLQ mm, mm [MMX]
+// * PSLLQ m64, mm [MMX]
+// * PSLLQ imm8, xmm [SSE2]
+// * PSLLQ xmm, xmm [SSE2]
+// * PSLLQ m128, xmm [SSE2]
+//
+func (self *Program) PSLLQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSLLQ", 2, Operands { v0, v1 })
+ // PSLLQ imm8, mm
+ if isImm8(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x73)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSLLQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf3)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSLLQ m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf3)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSLLQ imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x73)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSLLQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf3)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSLLQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf3)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSLLQ")
+ }
+ return p
+}
+
+// PSLLW performs "Shift Packed Word Data Left Logical".
+//
+// Mnemonic : PSLLW
+// Supported forms : (6 forms)
+//
+// * PSLLW imm8, mm [MMX]
+// * PSLLW mm, mm [MMX]
+// * PSLLW m64, mm [MMX]
+// * PSLLW imm8, xmm [SSE2]
+// * PSLLW xmm, xmm [SSE2]
+// * PSLLW m128, xmm [SSE2]
+//
+func (self *Program) PSLLW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSLLW", 2, Operands { v0, v1 })
+ // PSLLW imm8, mm
+ if isImm8(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x71)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSLLW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSLLW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSLLW imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x71)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSLLW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSLLW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSLLW")
+ }
+ return p
+}
+
+// PSRAD performs "Shift Packed Doubleword Data Right Arithmetic".
+//
+// Mnemonic : PSRAD
+// Supported forms : (6 forms)
+//
+// * PSRAD imm8, mm [MMX]
+// * PSRAD mm, mm [MMX]
+// * PSRAD m64, mm [MMX]
+// * PSRAD imm8, xmm [SSE2]
+// * PSRAD xmm, xmm [SSE2]
+// * PSRAD m128, xmm [SSE2]
+//
+func (self *Program) PSRAD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSRAD", 2, Operands { v0, v1 })
+ // PSRAD imm8, mm
+ if isImm8(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRAD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRAD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe2)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSRAD imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRAD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRAD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe2)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSRAD")
+ }
+ return p
+}
+
+// PSRAW performs "Shift Packed Word Data Right Arithmetic".
+//
+// Mnemonic : PSRAW
+// Supported forms : (6 forms)
+//
+// * PSRAW imm8, mm [MMX]
+// * PSRAW mm, mm [MMX]
+// * PSRAW m64, mm [MMX]
+// * PSRAW imm8, xmm [SSE2]
+// * PSRAW xmm, xmm [SSE2]
+// * PSRAW m128, xmm [SSE2]
+//
+func (self *Program) PSRAW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSRAW", 2, Operands { v0, v1 })
+ // PSRAW imm8, mm
+ if isImm8(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x71)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRAW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRAW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSRAW imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x71)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRAW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRAW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSRAW")
+ }
+ return p
+}
+
+// PSRLD performs "Shift Packed Doubleword Data Right Logical".
+//
+// Mnemonic : PSRLD
+// Supported forms : (6 forms)
+//
+// * PSRLD imm8, mm [MMX]
+// * PSRLD mm, mm [MMX]
+// * PSRLD m64, mm [MMX]
+// * PSRLD imm8, xmm [SSE2]
+// * PSRLD xmm, xmm [SSE2]
+// * PSRLD m128, xmm [SSE2]
+//
+func (self *Program) PSRLD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSRLD", 2, Operands { v0, v1 })
+ // PSRLD imm8, mm
+ if isImm8(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x72)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRLD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd2)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRLD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd2)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSRLD imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x72)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRLD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd2)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRLD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd2)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSRLD")
+ }
+ return p
+}
+
+// PSRLDQ performs "Shift Packed Double Quadword Right Logical".
+//
+// Mnemonic : PSRLDQ
+// Supported forms : (1 form)
+//
+// * PSRLDQ imm8, xmm [SSE2]
+//
+func (self *Program) PSRLDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSRLDQ", 2, Operands { v0, v1 })
+ // PSRLDQ imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x73)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSRLDQ")
+ }
+ return p
+}
+
+// PSRLQ performs "Shift Packed Quadword Data Right Logical".
+//
+// Mnemonic : PSRLQ
+// Supported forms : (6 forms)
+//
+// * PSRLQ imm8, mm [MMX]
+// * PSRLQ mm, mm [MMX]
+// * PSRLQ m64, mm [MMX]
+// * PSRLQ imm8, xmm [SSE2]
+// * PSRLQ xmm, xmm [SSE2]
+// * PSRLQ m128, xmm [SSE2]
+//
+func (self *Program) PSRLQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSRLQ", 2, Operands { v0, v1 })
+ // PSRLQ imm8, mm
+ if isImm8(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x73)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRLQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRLQ m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd3)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSRLQ imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x73)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRLQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRLQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd3)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSRLQ")
+ }
+ return p
+}
+
+// PSRLW performs "Shift Packed Word Data Right Logical".
+//
+// Mnemonic : PSRLW
+// Supported forms : (6 forms)
+//
+// * PSRLW imm8, mm [MMX]
+// * PSRLW mm, mm [MMX]
+// * PSRLW m64, mm [MMX]
+// * PSRLW imm8, xmm [SSE2]
+// * PSRLW xmm, xmm [SSE2]
+// * PSRLW m128, xmm [SSE2]
+//
+func (self *Program) PSRLW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSRLW", 2, Operands { v0, v1 })
+ // PSRLW imm8, mm
+ if isImm8(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x71)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRLW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRLW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSRLW imm8, xmm
+ if isImm8(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x0f)
+ m.emit(0x71)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PSRLW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSRLW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSRLW")
+ }
+ return p
+}
+
+// PSUBB performs "Subtract Packed Byte Integers".
+//
+// Mnemonic : PSUBB
+// Supported forms : (4 forms)
+//
+// * PSUBB mm, mm [MMX]
+// * PSUBB m64, mm [MMX]
+// * PSUBB xmm, xmm [SSE2]
+// * PSUBB m128, xmm [SSE2]
+//
+func (self *Program) PSUBB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSUBB", 2, Operands { v0, v1 })
+ // PSUBB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSUBB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSUBB")
+ }
+ return p
+}
+
+// PSUBD performs "Subtract Packed Doubleword Integers".
+//
+// Mnemonic : PSUBD
+// Supported forms : (4 forms)
+//
+// * PSUBD mm, mm [MMX]
+// * PSUBD m64, mm [MMX]
+// * PSUBD xmm, xmm [SSE2]
+// * PSUBD m128, xmm [SSE2]
+//
+func (self *Program) PSUBD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSUBD", 2, Operands { v0, v1 })
+ // PSUBD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfa)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfa)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSUBD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfa)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfa)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSUBD")
+ }
+ return p
+}
+
+// PSUBQ performs "Subtract Packed Quadword Integers".
+//
+// Mnemonic : PSUBQ
+// Supported forms : (4 forms)
+//
+// * PSUBQ mm, mm [SSE2]
+// * PSUBQ m64, mm [SSE2]
+// * PSUBQ xmm, xmm [SSE2]
+// * PSUBQ m128, xmm [SSE2]
+//
+func (self *Program) PSUBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSUBQ", 2, Operands { v0, v1 })
+ // PSUBQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBQ m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSUBQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xfb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xfb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSUBQ")
+ }
+ return p
+}
+
+// PSUBSB performs "Subtract Packed Signed Byte Integers with Signed Saturation".
+//
+// Mnemonic : PSUBSB
+// Supported forms : (4 forms)
+//
+// * PSUBSB mm, mm [MMX]
+// * PSUBSB m64, mm [MMX]
+// * PSUBSB xmm, xmm [SSE2]
+// * PSUBSB m128, xmm [SSE2]
+//
+func (self *Program) PSUBSB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSUBSB", 2, Operands { v0, v1 })
+ // PSUBSB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBSB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSUBSB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBSB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSUBSB")
+ }
+ return p
+}
+
+// PSUBSW performs "Subtract Packed Signed Word Integers with Signed Saturation".
+//
+// Mnemonic : PSUBSW
+// Supported forms : (4 forms)
+//
+// * PSUBSW mm, mm [MMX]
+// * PSUBSW m64, mm [MMX]
+// * PSUBSW xmm, xmm [SSE2]
+// * PSUBSW m128, xmm [SSE2]
+//
+func (self *Program) PSUBSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSUBSW", 2, Operands { v0, v1 })
+ // PSUBSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe9)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe9)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSUBSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xe9)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xe9)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSUBSW")
+ }
+ return p
+}
+
+// PSUBUSB performs "Subtract Packed Unsigned Byte Integers with Unsigned Saturation".
+//
+// Mnemonic : PSUBUSB
+// Supported forms : (4 forms)
+//
+// * PSUBUSB mm, mm [MMX]
+// * PSUBUSB m64, mm [MMX]
+// * PSUBUSB xmm, xmm [SSE2]
+// * PSUBUSB m128, xmm [SSE2]
+//
+func (self *Program) PSUBUSB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSUBUSB", 2, Operands { v0, v1 })
+ // PSUBUSB mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBUSB m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSUBUSB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBUSB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSUBUSB")
+ }
+ return p
+}
+
+// PSUBUSW performs "Subtract Packed Unsigned Word Integers with Unsigned Saturation".
+//
+// Mnemonic : PSUBUSW
+// Supported forms : (4 forms)
+//
+// * PSUBUSW mm, mm [MMX]
+// * PSUBUSW m64, mm [MMX]
+// * PSUBUSW xmm, xmm [SSE2]
+// * PSUBUSW m128, xmm [SSE2]
+//
+func (self *Program) PSUBUSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSUBUSW", 2, Operands { v0, v1 })
+ // PSUBUSW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd9)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBUSW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd9)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSUBUSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xd9)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBUSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xd9)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSUBUSW")
+ }
+ return p
+}
+
+// PSUBW performs "Subtract Packed Word Integers".
+//
+// Mnemonic : PSUBW
+// Supported forms : (4 forms)
+//
+// * PSUBW mm, mm [MMX]
+// * PSUBW m64, mm [MMX]
+// * PSUBW xmm, xmm [SSE2]
+// * PSUBW m128, xmm [SSE2]
+//
+func (self *Program) PSUBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSUBW", 2, Operands { v0, v1 })
+ // PSUBW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf9)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf9)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PSUBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xf9)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PSUBW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xf9)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSUBW")
+ }
+ return p
+}
+
+// PSWAPD performs "Packed Swap Doubleword".
+//
+// Mnemonic : PSWAPD
+// Supported forms : (2 forms)
+//
+// * PSWAPD mm, mm [3dnow!+]
+// * PSWAPD m64, mm [3dnow!+]
+//
+func (self *Program) PSWAPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PSWAPD", 2, Operands { v0, v1 })
+ // PSWAPD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ m.emit(0xbb)
+ })
+ }
+ // PSWAPD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_3DNOW_PLUS)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ m.emit(0xbb)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PSWAPD")
+ }
+ return p
+}
+
+// PTEST performs "Packed Logical Compare".
+//
+// Mnemonic : PTEST
+// Supported forms : (2 forms)
+//
+// * PTEST xmm, xmm [SSE4.1]
+// * PTEST m128, xmm [SSE4.1]
+//
+func (self *Program) PTEST(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PTEST", 2, Operands { v0, v1 })
+ // PTEST xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x17)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PTEST m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0x17)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PTEST")
+ }
+ return p
+}
+
+// PUNPCKHBW performs "Unpack and Interleave High-Order Bytes into Words".
+//
+// Mnemonic : PUNPCKHBW
+// Supported forms : (4 forms)
+//
+// * PUNPCKHBW mm, mm [MMX]
+// * PUNPCKHBW m64, mm [MMX]
+// * PUNPCKHBW xmm, xmm [SSE2]
+// * PUNPCKHBW m128, xmm [SSE2]
+//
+func (self *Program) PUNPCKHBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PUNPCKHBW", 2, Operands { v0, v1 })
+ // PUNPCKHBW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKHBW m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x68)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PUNPCKHBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKHBW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x68)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUNPCKHBW")
+ }
+ return p
+}
+
+// PUNPCKHDQ performs "Unpack and Interleave High-Order Doublewords into Quadwords".
+//
+// Mnemonic : PUNPCKHDQ
+// Supported forms : (4 forms)
+//
+// * PUNPCKHDQ mm, mm [MMX]
+// * PUNPCKHDQ m64, mm [MMX]
+// * PUNPCKHDQ xmm, xmm [SSE2]
+// * PUNPCKHDQ m128, xmm [SSE2]
+//
+func (self *Program) PUNPCKHDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PUNPCKHDQ", 2, Operands { v0, v1 })
+ // PUNPCKHDQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKHDQ m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PUNPCKHDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKHDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUNPCKHDQ")
+ }
+ return p
+}
+
+// PUNPCKHQDQ performs "Unpack and Interleave High-Order Quadwords into Double Quadwords".
+//
+// Mnemonic : PUNPCKHQDQ
+// Supported forms : (2 forms)
+//
+// * PUNPCKHQDQ xmm, xmm [SSE2]
+// * PUNPCKHQDQ m128, xmm [SSE2]
+//
+func (self *Program) PUNPCKHQDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PUNPCKHQDQ", 2, Operands { v0, v1 })
+ // PUNPCKHQDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKHQDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUNPCKHQDQ")
+ }
+ return p
+}
+
+// PUNPCKHWD performs "Unpack and Interleave High-Order Words into Doublewords".
+//
+// Mnemonic : PUNPCKHWD
+// Supported forms : (4 forms)
+//
+// * PUNPCKHWD mm, mm [MMX]
+// * PUNPCKHWD m64, mm [MMX]
+// * PUNPCKHWD xmm, xmm [SSE2]
+// * PUNPCKHWD m128, xmm [SSE2]
+//
+func (self *Program) PUNPCKHWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PUNPCKHWD", 2, Operands { v0, v1 })
+ // PUNPCKHWD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKHWD m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x69)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PUNPCKHWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKHWD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x69)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUNPCKHWD")
+ }
+ return p
+}
+
+// PUNPCKLBW performs "Unpack and Interleave Low-Order Bytes into Words".
+//
+// Mnemonic : PUNPCKLBW
+// Supported forms : (4 forms)
+//
+// * PUNPCKLBW mm, mm [MMX]
+// * PUNPCKLBW m32, mm [MMX]
+// * PUNPCKLBW xmm, xmm [SSE2]
+// * PUNPCKLBW m128, xmm [SSE2]
+//
+func (self *Program) PUNPCKLBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PUNPCKLBW", 2, Operands { v0, v1 })
+ // PUNPCKLBW mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x60)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKLBW m32, mm
+ if isM32(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x60)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PUNPCKLBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x60)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKLBW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x60)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUNPCKLBW")
+ }
+ return p
+}
+
+// PUNPCKLDQ performs "Unpack and Interleave Low-Order Doublewords into Quadwords".
+//
+// Mnemonic : PUNPCKLDQ
+// Supported forms : (4 forms)
+//
+// * PUNPCKLDQ mm, mm [MMX]
+// * PUNPCKLDQ m32, mm [MMX]
+// * PUNPCKLDQ xmm, xmm [SSE2]
+// * PUNPCKLDQ m128, xmm [SSE2]
+//
+func (self *Program) PUNPCKLDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PUNPCKLDQ", 2, Operands { v0, v1 })
+ // PUNPCKLDQ mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x62)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKLDQ m32, mm
+ if isM32(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x62)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PUNPCKLDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x62)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKLDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x62)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUNPCKLDQ")
+ }
+ return p
+}
+
+// PUNPCKLQDQ performs "Unpack and Interleave Low-Order Quadwords into Double Quadwords".
+//
+// Mnemonic : PUNPCKLQDQ
+// Supported forms : (2 forms)
+//
+// * PUNPCKLQDQ xmm, xmm [SSE2]
+// * PUNPCKLQDQ m128, xmm [SSE2]
+//
+func (self *Program) PUNPCKLQDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PUNPCKLQDQ", 2, Operands { v0, v1 })
+ // PUNPCKLQDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKLQDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x6c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUNPCKLQDQ")
+ }
+ return p
+}
+
+// PUNPCKLWD performs "Unpack and Interleave Low-Order Words into Doublewords".
+//
+// Mnemonic : PUNPCKLWD
+// Supported forms : (4 forms)
+//
+// * PUNPCKLWD mm, mm [MMX]
+// * PUNPCKLWD m32, mm [MMX]
+// * PUNPCKLWD xmm, xmm [SSE2]
+// * PUNPCKLWD m128, xmm [SSE2]
+//
+func (self *Program) PUNPCKLWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PUNPCKLWD", 2, Operands { v0, v1 })
+ // PUNPCKLWD mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x61)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKLWD m32, mm
+ if isM32(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x61)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PUNPCKLWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x61)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PUNPCKLWD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x61)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUNPCKLWD")
+ }
+ return p
+}
+
+// PUSHQ performs "Push Value Onto the Stack".
+//
+// Mnemonic : PUSH
+// Supported forms : (4 forms)
+//
+// * PUSHQ imm8
+// * PUSHQ imm32
+// * PUSHQ r64
+// * PUSHQ m64
+//
+func (self *Program) PUSHQ(v0 interface{}) *Instruction {
+ p := self.alloc("PUSHQ", 1, Operands { v0 })
+ // PUSHQ imm8
+ if isImm8Ext(v0, 8) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x6a)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // PUSHQ imm32
+ if isImm32Ext(v0, 8) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x68)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // PUSHQ r64
+ if isReg64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0x50 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0xff)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // PUSHQ m64
+ if isM64(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xff)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUSHQ")
+ }
+ return p
+}
+
+// PUSHW performs "Push Value Onto the Stack".
+//
+// Mnemonic : PUSH
+// Supported forms : (2 forms)
+//
+// * PUSHW r16
+// * PUSHW m16
+//
+func (self *Program) PUSHW(v0 interface{}) *Instruction {
+ p := self.alloc("PUSHW", 1, Operands { v0 })
+ // PUSHW r16
+ if isReg16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0x50 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0xff)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // PUSHW m16
+ if isM16(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0xff)
+ m.mrsd(6, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PUSHW")
+ }
+ return p
+}
+
+// PXOR performs "Packed Bitwise Logical Exclusive OR".
+//
+// Mnemonic : PXOR
+// Supported forms : (4 forms)
+//
+// * PXOR mm, mm [MMX]
+// * PXOR m64, mm [MMX]
+// * PXOR xmm, xmm [SSE2]
+// * PXOR m128, xmm [SSE2]
+//
+func (self *Program) PXOR(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("PXOR", 2, Operands { v0, v1 })
+ // PXOR mm, mm
+ if isMM(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PXOR m64, mm
+ if isM64(v0) && isMM(v1) {
+ self.require(ISA_MMX)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xef)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // PXOR xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // PXOR m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xef)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for PXOR")
+ }
+ return p
+}
+
+// RCLB performs "Rotate Left through Carry Flag".
+//
+// Mnemonic : RCL
+// Supported forms : (6 forms)
+//
+// * RCLB 1, r8
+// * RCLB imm8, r8
+// * RCLB cl, r8
+// * RCLB 1, m8
+// * RCLB imm8, m8
+// * RCLB cl, m8
+//
+func (self *Program) RCLB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCLB", 2, Operands { v0, v1 })
+ // RCLB 1, r8
+ if isConst1(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd0)
+ m.emit(0xd0 | lcode(v[1]))
+ })
+ }
+ // RCLB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xc0)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCLB cl, r8
+ if v0 == CL && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd2)
+ m.emit(0xd0 | lcode(v[1]))
+ })
+ }
+ // RCLB 1, m8
+ if isConst1(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd0)
+ m.mrsd(2, addr(v[1]), 1)
+ })
+ }
+ // RCLB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc0)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCLB cl, m8
+ if v0 == CL && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd2)
+ m.mrsd(2, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCLB")
+ }
+ return p
+}
+
+// RCLL performs "Rotate Left through Carry Flag".
+//
+// Mnemonic : RCL
+// Supported forms : (6 forms)
+//
+// * RCLL 1, r32
+// * RCLL imm8, r32
+// * RCLL cl, r32
+// * RCLL 1, m32
+// * RCLL imm8, m32
+// * RCLL cl, m32
+//
+func (self *Program) RCLL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCLL", 2, Operands { v0, v1 })
+ // RCLL 1, r32
+ if isConst1(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xd0 | lcode(v[1]))
+ })
+ }
+ // RCLL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCLL cl, r32
+ if v0 == CL && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xd0 | lcode(v[1]))
+ })
+ }
+ // RCLL 1, m32
+ if isConst1(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(2, addr(v[1]), 1)
+ })
+ }
+ // RCLL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCLL cl, m32
+ if v0 == CL && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(2, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCLL")
+ }
+ return p
+}
+
+// RCLQ performs "Rotate Left through Carry Flag".
+//
+// Mnemonic : RCL
+// Supported forms : (6 forms)
+//
+// * RCLQ 1, r64
+// * RCLQ imm8, r64
+// * RCLQ cl, r64
+// * RCLQ 1, m64
+// * RCLQ imm8, m64
+// * RCLQ cl, m64
+//
+func (self *Program) RCLQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCLQ", 2, Operands { v0, v1 })
+ // RCLQ 1, r64
+ if isConst1(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xd0 | lcode(v[1]))
+ })
+ }
+ // RCLQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xc1)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCLQ cl, r64
+ if v0 == CL && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xd0 | lcode(v[1]))
+ })
+ }
+ // RCLQ 1, m64
+ if isConst1(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd1)
+ m.mrsd(2, addr(v[1]), 1)
+ })
+ }
+ // RCLQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xc1)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCLQ cl, m64
+ if v0 == CL && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd3)
+ m.mrsd(2, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCLQ")
+ }
+ return p
+}
+
+// RCLW performs "Rotate Left through Carry Flag".
+//
+// Mnemonic : RCL
+// Supported forms : (6 forms)
+//
+// * RCLW 1, r16
+// * RCLW imm8, r16
+// * RCLW cl, r16
+// * RCLW 1, m16
+// * RCLW imm8, m16
+// * RCLW cl, m16
+//
+func (self *Program) RCLW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCLW", 2, Operands { v0, v1 })
+ // RCLW 1, r16
+ if isConst1(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xd0 | lcode(v[1]))
+ })
+ }
+ // RCLW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCLW cl, r16
+ if v0 == CL && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xd0 | lcode(v[1]))
+ })
+ }
+ // RCLW 1, m16
+ if isConst1(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(2, addr(v[1]), 1)
+ })
+ }
+ // RCLW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(2, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCLW cl, m16
+ if v0 == CL && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(2, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCLW")
+ }
+ return p
+}
+
+// RCPPS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : RCPPS
+// Supported forms : (2 forms)
+//
+// * RCPPS xmm, xmm [SSE]
+// * RCPPS m128, xmm [SSE]
+//
+func (self *Program) RCPPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCPPS", 2, Operands { v0, v1 })
+ // RCPPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x53)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // RCPPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x53)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCPPS")
+ }
+ return p
+}
+
+// RCPSS performs "Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : RCPSS
+// Supported forms : (2 forms)
+//
+// * RCPSS xmm, xmm [SSE]
+// * RCPSS m32, xmm [SSE]
+//
+func (self *Program) RCPSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCPSS", 2, Operands { v0, v1 })
+ // RCPSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x53)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // RCPSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x53)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCPSS")
+ }
+ return p
+}
+
+// RCRB performs "Rotate Right through Carry Flag".
+//
+// Mnemonic : RCR
+// Supported forms : (6 forms)
+//
+// * RCRB 1, r8
+// * RCRB imm8, r8
+// * RCRB cl, r8
+// * RCRB 1, m8
+// * RCRB imm8, m8
+// * RCRB cl, m8
+//
+func (self *Program) RCRB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCRB", 2, Operands { v0, v1 })
+ // RCRB 1, r8
+ if isConst1(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd0)
+ m.emit(0xd8 | lcode(v[1]))
+ })
+ }
+ // RCRB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xc0)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCRB cl, r8
+ if v0 == CL && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd2)
+ m.emit(0xd8 | lcode(v[1]))
+ })
+ }
+ // RCRB 1, m8
+ if isConst1(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd0)
+ m.mrsd(3, addr(v[1]), 1)
+ })
+ }
+ // RCRB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc0)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCRB cl, m8
+ if v0 == CL && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd2)
+ m.mrsd(3, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCRB")
+ }
+ return p
+}
+
+// RCRL performs "Rotate Right through Carry Flag".
+//
+// Mnemonic : RCR
+// Supported forms : (6 forms)
+//
+// * RCRL 1, r32
+// * RCRL imm8, r32
+// * RCRL cl, r32
+// * RCRL 1, m32
+// * RCRL imm8, m32
+// * RCRL cl, m32
+//
+func (self *Program) RCRL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCRL", 2, Operands { v0, v1 })
+ // RCRL 1, r32
+ if isConst1(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xd8 | lcode(v[1]))
+ })
+ }
+ // RCRL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCRL cl, r32
+ if v0 == CL && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xd8 | lcode(v[1]))
+ })
+ }
+ // RCRL 1, m32
+ if isConst1(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(3, addr(v[1]), 1)
+ })
+ }
+ // RCRL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCRL cl, m32
+ if v0 == CL && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(3, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCRL")
+ }
+ return p
+}
+
+// RCRQ performs "Rotate Right through Carry Flag".
+//
+// Mnemonic : RCR
+// Supported forms : (6 forms)
+//
+// * RCRQ 1, r64
+// * RCRQ imm8, r64
+// * RCRQ cl, r64
+// * RCRQ 1, m64
+// * RCRQ imm8, m64
+// * RCRQ cl, m64
+//
+func (self *Program) RCRQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCRQ", 2, Operands { v0, v1 })
+ // RCRQ 1, r64
+ if isConst1(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xd8 | lcode(v[1]))
+ })
+ }
+ // RCRQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xc1)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCRQ cl, r64
+ if v0 == CL && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xd8 | lcode(v[1]))
+ })
+ }
+ // RCRQ 1, m64
+ if isConst1(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd1)
+ m.mrsd(3, addr(v[1]), 1)
+ })
+ }
+ // RCRQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xc1)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCRQ cl, m64
+ if v0 == CL && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd3)
+ m.mrsd(3, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCRQ")
+ }
+ return p
+}
+
+// RCRW performs "Rotate Right through Carry Flag".
+//
+// Mnemonic : RCR
+// Supported forms : (6 forms)
+//
+// * RCRW 1, r16
+// * RCRW imm8, r16
+// * RCRW cl, r16
+// * RCRW 1, m16
+// * RCRW imm8, m16
+// * RCRW cl, m16
+//
+func (self *Program) RCRW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RCRW", 2, Operands { v0, v1 })
+ // RCRW 1, r16
+ if isConst1(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xd8 | lcode(v[1]))
+ })
+ }
+ // RCRW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCRW cl, r16
+ if v0 == CL && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xd8 | lcode(v[1]))
+ })
+ }
+ // RCRW 1, m16
+ if isConst1(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(3, addr(v[1]), 1)
+ })
+ }
+ // RCRW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RCRW cl, m16
+ if v0 == CL && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(3, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RCRW")
+ }
+ return p
+}
+
+// RDRAND performs "Read Random Number".
+//
+// Mnemonic : RDRAND
+// Supported forms : (3 forms)
+//
+// * RDRAND r16 [RDRAND]
+// * RDRAND r32 [RDRAND]
+// * RDRAND r64 [RDRAND]
+//
+func (self *Program) RDRAND(v0 interface{}) *Instruction {
+ p := self.alloc("RDRAND", 1, Operands { v0 })
+ // RDRAND r16
+ if isReg16(v0) {
+ self.require(ISA_RDRAND)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0x0f)
+ m.emit(0xc7)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // RDRAND r32
+ if isReg32(v0) {
+ self.require(ISA_RDRAND)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0x0f)
+ m.emit(0xc7)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ // RDRAND r64
+ if isReg64(v0) {
+ self.require(ISA_RDRAND)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xc7)
+ m.emit(0xf0 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RDRAND")
+ }
+ return p
+}
+
+// RDSEED performs "Read Random SEED".
+//
+// Mnemonic : RDSEED
+// Supported forms : (3 forms)
+//
+// * RDSEED r16 [RDSEED]
+// * RDSEED r32 [RDSEED]
+// * RDSEED r64 [RDSEED]
+//
+func (self *Program) RDSEED(v0 interface{}) *Instruction {
+ p := self.alloc("RDSEED", 1, Operands { v0 })
+ // RDSEED r16
+ if isReg16(v0) {
+ self.require(ISA_RDSEED)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0x0f)
+ m.emit(0xc7)
+ m.emit(0xf8 | lcode(v[0]))
+ })
+ }
+ // RDSEED r32
+ if isReg32(v0) {
+ self.require(ISA_RDSEED)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0x0f)
+ m.emit(0xc7)
+ m.emit(0xf8 | lcode(v[0]))
+ })
+ }
+ // RDSEED r64
+ if isReg64(v0) {
+ self.require(ISA_RDSEED)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xc7)
+ m.emit(0xf8 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RDSEED")
+ }
+ return p
+}
+
+// RDTSC performs "Read Time-Stamp Counter".
+//
+// Mnemonic : RDTSC
+// Supported forms : (1 form)
+//
+// * RDTSC [RDTSC]
+//
+func (self *Program) RDTSC() *Instruction {
+ p := self.alloc("RDTSC", 0, Operands { })
+ // RDTSC
+ self.require(ISA_RDTSC)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x31)
+ })
+ return p
+}
+
+// RDTSCP performs "Read Time-Stamp Counter and Processor ID".
+//
+// Mnemonic : RDTSCP
+// Supported forms : (1 form)
+//
+// * RDTSCP [RDTSCP]
+//
+func (self *Program) RDTSCP() *Instruction {
+ p := self.alloc("RDTSCP", 0, Operands { })
+ // RDTSCP
+ self.require(ISA_RDTSCP)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x01)
+ m.emit(0xf9)
+ })
+ return p
+}
+
+// RET performs "Return from Procedure".
+//
+// Mnemonic : RET
+// Supported forms : (2 forms)
+//
+// * RET
+// * RET imm16
+//
+func (self *Program) RET(vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("RET", 0, Operands { })
+ case 1 : p = self.alloc("RET", 1, Operands { vv[0] })
+ default : panic("instruction RET takes 0 or 1 operands")
+ }
+ // RET
+ if len(vv) == 0 {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc3)
+ })
+ }
+ // RET imm16
+ if len(vv) == 1 && isImm16(vv[0]) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc2)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RET")
+ }
+ return p
+}
+
+// ROLB performs "Rotate Left".
+//
+// Mnemonic : ROL
+// Supported forms : (6 forms)
+//
+// * ROLB 1, r8
+// * ROLB imm8, r8
+// * ROLB cl, r8
+// * ROLB 1, m8
+// * ROLB imm8, m8
+// * ROLB cl, m8
+//
+func (self *Program) ROLB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ROLB", 2, Operands { v0, v1 })
+ // ROLB 1, r8
+ if isConst1(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd0)
+ m.emit(0xc0 | lcode(v[1]))
+ })
+ }
+ // ROLB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xc0)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROLB cl, r8
+ if v0 == CL && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd2)
+ m.emit(0xc0 | lcode(v[1]))
+ })
+ }
+ // ROLB 1, m8
+ if isConst1(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd0)
+ m.mrsd(0, addr(v[1]), 1)
+ })
+ }
+ // ROLB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc0)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROLB cl, m8
+ if v0 == CL && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd2)
+ m.mrsd(0, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ROLB")
+ }
+ return p
+}
+
+// ROLL performs "Rotate Left".
+//
+// Mnemonic : ROL
+// Supported forms : (6 forms)
+//
+// * ROLL 1, r32
+// * ROLL imm8, r32
+// * ROLL cl, r32
+// * ROLL 1, m32
+// * ROLL imm8, m32
+// * ROLL cl, m32
+//
+func (self *Program) ROLL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ROLL", 2, Operands { v0, v1 })
+ // ROLL 1, r32
+ if isConst1(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[1]))
+ })
+ }
+ // ROLL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROLL cl, r32
+ if v0 == CL && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[1]))
+ })
+ }
+ // ROLL 1, m32
+ if isConst1(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(0, addr(v[1]), 1)
+ })
+ }
+ // ROLL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROLL cl, m32
+ if v0 == CL && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(0, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ROLL")
+ }
+ return p
+}
+
+// ROLQ performs "Rotate Left".
+//
+// Mnemonic : ROL
+// Supported forms : (6 forms)
+//
+// * ROLQ 1, r64
+// * ROLQ imm8, r64
+// * ROLQ cl, r64
+// * ROLQ 1, m64
+// * ROLQ imm8, m64
+// * ROLQ cl, m64
+//
+func (self *Program) ROLQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ROLQ", 2, Operands { v0, v1 })
+ // ROLQ 1, r64
+ if isConst1(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[1]))
+ })
+ }
+ // ROLQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xc1)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROLQ cl, r64
+ if v0 == CL && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[1]))
+ })
+ }
+ // ROLQ 1, m64
+ if isConst1(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd1)
+ m.mrsd(0, addr(v[1]), 1)
+ })
+ }
+ // ROLQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xc1)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROLQ cl, m64
+ if v0 == CL && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd3)
+ m.mrsd(0, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ROLQ")
+ }
+ return p
+}
+
+// ROLW performs "Rotate Left".
+//
+// Mnemonic : ROL
+// Supported forms : (6 forms)
+//
+// * ROLW 1, r16
+// * ROLW imm8, r16
+// * ROLW cl, r16
+// * ROLW 1, m16
+// * ROLW imm8, m16
+// * ROLW cl, m16
+//
+func (self *Program) ROLW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("ROLW", 2, Operands { v0, v1 })
+ // ROLW 1, r16
+ if isConst1(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[1]))
+ })
+ }
+ // ROLW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROLW cl, r16
+ if v0 == CL && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[1]))
+ })
+ }
+ // ROLW 1, m16
+ if isConst1(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(0, addr(v[1]), 1)
+ })
+ }
+ // ROLW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROLW cl, m16
+ if v0 == CL && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(0, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ROLW")
+ }
+ return p
+}
+
+// RORB performs "Rotate Right".
+//
+// Mnemonic : ROR
+// Supported forms : (6 forms)
+//
+// * RORB 1, r8
+// * RORB imm8, r8
+// * RORB cl, r8
+// * RORB 1, m8
+// * RORB imm8, m8
+// * RORB cl, m8
+//
+func (self *Program) RORB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RORB", 2, Operands { v0, v1 })
+ // RORB 1, r8
+ if isConst1(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd0)
+ m.emit(0xc8 | lcode(v[1]))
+ })
+ }
+ // RORB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xc0)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORB cl, r8
+ if v0 == CL && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd2)
+ m.emit(0xc8 | lcode(v[1]))
+ })
+ }
+ // RORB 1, m8
+ if isConst1(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd0)
+ m.mrsd(1, addr(v[1]), 1)
+ })
+ }
+ // RORB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc0)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORB cl, m8
+ if v0 == CL && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd2)
+ m.mrsd(1, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RORB")
+ }
+ return p
+}
+
+// RORL performs "Rotate Right".
+//
+// Mnemonic : ROR
+// Supported forms : (6 forms)
+//
+// * RORL 1, r32
+// * RORL imm8, r32
+// * RORL cl, r32
+// * RORL 1, m32
+// * RORL imm8, m32
+// * RORL cl, m32
+//
+func (self *Program) RORL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RORL", 2, Operands { v0, v1 })
+ // RORL 1, r32
+ if isConst1(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xc8 | lcode(v[1]))
+ })
+ }
+ // RORL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORL cl, r32
+ if v0 == CL && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xc8 | lcode(v[1]))
+ })
+ }
+ // RORL 1, m32
+ if isConst1(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(1, addr(v[1]), 1)
+ })
+ }
+ // RORL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORL cl, m32
+ if v0 == CL && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(1, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RORL")
+ }
+ return p
+}
+
+// RORQ performs "Rotate Right".
+//
+// Mnemonic : ROR
+// Supported forms : (6 forms)
+//
+// * RORQ 1, r64
+// * RORQ imm8, r64
+// * RORQ cl, r64
+// * RORQ 1, m64
+// * RORQ imm8, m64
+// * RORQ cl, m64
+//
+func (self *Program) RORQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RORQ", 2, Operands { v0, v1 })
+ // RORQ 1, r64
+ if isConst1(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xc8 | lcode(v[1]))
+ })
+ }
+ // RORQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xc1)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORQ cl, r64
+ if v0 == CL && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xc8 | lcode(v[1]))
+ })
+ }
+ // RORQ 1, m64
+ if isConst1(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd1)
+ m.mrsd(1, addr(v[1]), 1)
+ })
+ }
+ // RORQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xc1)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORQ cl, m64
+ if v0 == CL && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd3)
+ m.mrsd(1, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RORQ")
+ }
+ return p
+}
+
+// RORW performs "Rotate Right".
+//
+// Mnemonic : ROR
+// Supported forms : (6 forms)
+//
+// * RORW 1, r16
+// * RORW imm8, r16
+// * RORW cl, r16
+// * RORW 1, m16
+// * RORW imm8, m16
+// * RORW cl, m16
+//
+func (self *Program) RORW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RORW", 2, Operands { v0, v1 })
+ // RORW 1, r16
+ if isConst1(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xc8 | lcode(v[1]))
+ })
+ }
+ // RORW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORW cl, r16
+ if v0 == CL && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xc8 | lcode(v[1]))
+ })
+ }
+ // RORW 1, m16
+ if isConst1(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(1, addr(v[1]), 1)
+ })
+ }
+ // RORW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(1, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORW cl, m16
+ if v0 == CL && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(1, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RORW")
+ }
+ return p
+}
+
+// RORXL performs "Rotate Right Logical Without Affecting Flags".
+//
+// Mnemonic : RORX
+// Supported forms : (2 forms)
+//
+// * RORXL imm8, r32, r32 [BMI2]
+// * RORXL imm8, m32, r32 [BMI2]
+//
+func (self *Program) RORXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("RORXL", 3, Operands { v0, v1, v2 })
+ // RORXL imm8, r32, r32
+ if isImm8(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7b)
+ m.emit(0xf0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORXL imm8, m32, r32
+ if isImm8(v0) && isM32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x03, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RORXL")
+ }
+ return p
+}
+
+// RORXQ performs "Rotate Right Logical Without Affecting Flags".
+//
+// Mnemonic : RORX
+// Supported forms : (2 forms)
+//
+// * RORXQ imm8, r64, r64 [BMI2]
+// * RORXQ imm8, m64, r64 [BMI2]
+//
+func (self *Program) RORXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("RORXQ", 3, Operands { v0, v1, v2 })
+ // RORXQ imm8, r64, r64
+ if isImm8(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xfb)
+ m.emit(0xf0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // RORXQ imm8, m64, r64
+ if isImm8(v0) && isM64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x83, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RORXQ")
+ }
+ return p
+}
+
+// ROUNDPD performs "Round Packed Double Precision Floating-Point Values".
+//
+// Mnemonic : ROUNDPD
+// Supported forms : (2 forms)
+//
+// * ROUNDPD imm8, xmm, xmm [SSE4.1]
+// * ROUNDPD imm8, m128, xmm [SSE4.1]
+//
+func (self *Program) ROUNDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("ROUNDPD", 3, Operands { v0, v1, v2 })
+ // ROUNDPD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROUNDPD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x09)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ROUNDPD")
+ }
+ return p
+}
+
+// ROUNDPS performs "Round Packed Single Precision Floating-Point Values".
+//
+// Mnemonic : ROUNDPS
+// Supported forms : (2 forms)
+//
+// * ROUNDPS imm8, xmm, xmm [SSE4.1]
+// * ROUNDPS imm8, m128, xmm [SSE4.1]
+//
+func (self *Program) ROUNDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("ROUNDPS", 3, Operands { v0, v1, v2 })
+ // ROUNDPS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROUNDPS imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x08)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ROUNDPS")
+ }
+ return p
+}
+
+// ROUNDSD performs "Round Scalar Double Precision Floating-Point Values".
+//
+// Mnemonic : ROUNDSD
+// Supported forms : (2 forms)
+//
+// * ROUNDSD imm8, xmm, xmm [SSE4.1]
+// * ROUNDSD imm8, m64, xmm [SSE4.1]
+//
+func (self *Program) ROUNDSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("ROUNDSD", 3, Operands { v0, v1, v2 })
+ // ROUNDSD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROUNDSD imm8, m64, xmm
+ if isImm8(v0) && isM64(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0b)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ROUNDSD")
+ }
+ return p
+}
+
+// ROUNDSS performs "Round Scalar Single Precision Floating-Point Values".
+//
+// Mnemonic : ROUNDSS
+// Supported forms : (2 forms)
+//
+// * ROUNDSS imm8, xmm, xmm [SSE4.1]
+// * ROUNDSS imm8, m32, xmm [SSE4.1]
+//
+func (self *Program) ROUNDSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("ROUNDSS", 3, Operands { v0, v1, v2 })
+ // ROUNDSS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // ROUNDSS imm8, m32, xmm
+ if isImm8(v0) && isM32(v1) && isXMM(v2) {
+ self.require(ISA_SSE4_1)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0x0a)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for ROUNDSS")
+ }
+ return p
+}
+
+// RSQRTPS performs "Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : RSQRTPS
+// Supported forms : (2 forms)
+//
+// * RSQRTPS xmm, xmm [SSE]
+// * RSQRTPS m128, xmm [SSE]
+//
+func (self *Program) RSQRTPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RSQRTPS", 2, Operands { v0, v1 })
+ // RSQRTPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x52)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // RSQRTPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x52)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RSQRTPS")
+ }
+ return p
+}
+
+// RSQRTSS performs "Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : RSQRTSS
+// Supported forms : (2 forms)
+//
+// * RSQRTSS xmm, xmm [SSE]
+// * RSQRTSS m32, xmm [SSE]
+//
+func (self *Program) RSQRTSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("RSQRTSS", 2, Operands { v0, v1 })
+ // RSQRTSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x52)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // RSQRTSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x52)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for RSQRTSS")
+ }
+ return p
+}
+
+// SALB performs "Arithmetic Shift Left".
+//
+// Mnemonic : SAL
+// Supported forms : (6 forms)
+//
+// * SALB 1, r8
+// * SALB imm8, r8
+// * SALB cl, r8
+// * SALB 1, m8
+// * SALB imm8, m8
+// * SALB cl, m8
+//
+func (self *Program) SALB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SALB", 2, Operands { v0, v1 })
+ // SALB 1, r8
+ if isConst1(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd0)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SALB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xc0)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SALB cl, r8
+ if v0 == CL && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd2)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SALB 1, m8
+ if isConst1(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd0)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ // SALB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc0)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SALB cl, m8
+ if v0 == CL && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd2)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SALB")
+ }
+ return p
+}
+
+// SALL performs "Arithmetic Shift Left".
+//
+// Mnemonic : SAL
+// Supported forms : (6 forms)
+//
+// * SALL 1, r32
+// * SALL imm8, r32
+// * SALL cl, r32
+// * SALL 1, m32
+// * SALL imm8, m32
+// * SALL cl, m32
+//
+func (self *Program) SALL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SALL", 2, Operands { v0, v1 })
+ // SALL 1, r32
+ if isConst1(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SALL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SALL cl, r32
+ if v0 == CL && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SALL 1, m32
+ if isConst1(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ // SALL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SALL cl, m32
+ if v0 == CL && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SALL")
+ }
+ return p
+}
+
+// SALQ performs "Arithmetic Shift Left".
+//
+// Mnemonic : SAL
+// Supported forms : (6 forms)
+//
+// * SALQ 1, r64
+// * SALQ imm8, r64
+// * SALQ cl, r64
+// * SALQ 1, m64
+// * SALQ imm8, m64
+// * SALQ cl, m64
+//
+func (self *Program) SALQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SALQ", 2, Operands { v0, v1 })
+ // SALQ 1, r64
+ if isConst1(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SALQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xc1)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SALQ cl, r64
+ if v0 == CL && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SALQ 1, m64
+ if isConst1(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd1)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ // SALQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xc1)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SALQ cl, m64
+ if v0 == CL && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd3)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SALQ")
+ }
+ return p
+}
+
+// SALW performs "Arithmetic Shift Left".
+//
+// Mnemonic : SAL
+// Supported forms : (6 forms)
+//
+// * SALW 1, r16
+// * SALW imm8, r16
+// * SALW cl, r16
+// * SALW 1, m16
+// * SALW imm8, m16
+// * SALW cl, m16
+//
+func (self *Program) SALW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SALW", 2, Operands { v0, v1 })
+ // SALW 1, r16
+ if isConst1(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SALW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SALW cl, r16
+ if v0 == CL && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SALW 1, m16
+ if isConst1(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ // SALW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SALW cl, m16
+ if v0 == CL && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SALW")
+ }
+ return p
+}
+
+// SARB performs "Arithmetic Shift Right".
+//
+// Mnemonic : SAR
+// Supported forms : (6 forms)
+//
+// * SARB 1, r8
+// * SARB imm8, r8
+// * SARB cl, r8
+// * SARB 1, m8
+// * SARB imm8, m8
+// * SARB cl, m8
+//
+func (self *Program) SARB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SARB", 2, Operands { v0, v1 })
+ // SARB 1, r8
+ if isConst1(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd0)
+ m.emit(0xf8 | lcode(v[1]))
+ })
+ }
+ // SARB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xc0)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SARB cl, r8
+ if v0 == CL && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd2)
+ m.emit(0xf8 | lcode(v[1]))
+ })
+ }
+ // SARB 1, m8
+ if isConst1(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd0)
+ m.mrsd(7, addr(v[1]), 1)
+ })
+ }
+ // SARB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc0)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SARB cl, m8
+ if v0 == CL && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd2)
+ m.mrsd(7, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SARB")
+ }
+ return p
+}
+
+// SARL performs "Arithmetic Shift Right".
+//
+// Mnemonic : SAR
+// Supported forms : (6 forms)
+//
+// * SARL 1, r32
+// * SARL imm8, r32
+// * SARL cl, r32
+// * SARL 1, m32
+// * SARL imm8, m32
+// * SARL cl, m32
+//
+func (self *Program) SARL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SARL", 2, Operands { v0, v1 })
+ // SARL 1, r32
+ if isConst1(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xf8 | lcode(v[1]))
+ })
+ }
+ // SARL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SARL cl, r32
+ if v0 == CL && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xf8 | lcode(v[1]))
+ })
+ }
+ // SARL 1, m32
+ if isConst1(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(7, addr(v[1]), 1)
+ })
+ }
+ // SARL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SARL cl, m32
+ if v0 == CL && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(7, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SARL")
+ }
+ return p
+}
+
+// SARQ performs "Arithmetic Shift Right".
+//
+// Mnemonic : SAR
+// Supported forms : (6 forms)
+//
+// * SARQ 1, r64
+// * SARQ imm8, r64
+// * SARQ cl, r64
+// * SARQ 1, m64
+// * SARQ imm8, m64
+// * SARQ cl, m64
+//
+func (self *Program) SARQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SARQ", 2, Operands { v0, v1 })
+ // SARQ 1, r64
+ if isConst1(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xf8 | lcode(v[1]))
+ })
+ }
+ // SARQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xc1)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SARQ cl, r64
+ if v0 == CL && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xf8 | lcode(v[1]))
+ })
+ }
+ // SARQ 1, m64
+ if isConst1(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd1)
+ m.mrsd(7, addr(v[1]), 1)
+ })
+ }
+ // SARQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xc1)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SARQ cl, m64
+ if v0 == CL && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd3)
+ m.mrsd(7, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SARQ")
+ }
+ return p
+}
+
+// SARW performs "Arithmetic Shift Right".
+//
+// Mnemonic : SAR
+// Supported forms : (6 forms)
+//
+// * SARW 1, r16
+// * SARW imm8, r16
+// * SARW cl, r16
+// * SARW 1, m16
+// * SARW imm8, m16
+// * SARW cl, m16
+//
+func (self *Program) SARW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SARW", 2, Operands { v0, v1 })
+ // SARW 1, r16
+ if isConst1(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xf8 | lcode(v[1]))
+ })
+ }
+ // SARW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SARW cl, r16
+ if v0 == CL && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xf8 | lcode(v[1]))
+ })
+ }
+ // SARW 1, m16
+ if isConst1(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(7, addr(v[1]), 1)
+ })
+ }
+ // SARW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(7, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SARW cl, m16
+ if v0 == CL && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(7, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SARW")
+ }
+ return p
+}
+
+// SARXL performs "Arithmetic Shift Right Without Affecting Flags".
+//
+// Mnemonic : SARX
+// Supported forms : (2 forms)
+//
+// * SARXL r32, r32, r32 [BMI2]
+// * SARXL r32, m32, r32 [BMI2]
+//
+func (self *Program) SARXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SARXL", 3, Operands { v0, v1, v2 })
+ // SARXL r32, r32, r32
+ if isReg32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7a ^ (hlcode(v[0]) << 3))
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // SARXL r32, m32, r32
+ if isReg32(v0) && isM32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x02, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf7)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SARXL")
+ }
+ return p
+}
+
+// SARXQ performs "Arithmetic Shift Right Without Affecting Flags".
+//
+// Mnemonic : SARX
+// Supported forms : (2 forms)
+//
+// * SARXQ r64, r64, r64 [BMI2]
+// * SARXQ r64, m64, r64 [BMI2]
+//
+func (self *Program) SARXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SARXQ", 3, Operands { v0, v1, v2 })
+ // SARXQ r64, r64, r64
+ if isReg64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xfa ^ (hlcode(v[0]) << 3))
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // SARXQ r64, m64, r64
+ if isReg64(v0) && isM64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x82, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf7)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SARXQ")
+ }
+ return p
+}
+
+// SBBB performs "Subtract with Borrow".
+//
+// Mnemonic : SBB
+// Supported forms : (6 forms)
+//
+// * SBBB imm8, al
+// * SBBB imm8, r8
+// * SBBB r8, r8
+// * SBBB m8, r8
+// * SBBB imm8, m8
+// * SBBB r8, m8
+//
+func (self *Program) SBBB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SBBB", 2, Operands { v0, v1 })
+ // SBBB imm8, al
+ if isImm8(v0) && v1 == AL {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x1c)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SBBB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0x80)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SBBB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x1a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SBBB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x1a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // SBBB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x80)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SBBB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x18)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SBBB")
+ }
+ return p
+}
+
+// SBBL performs "Subtract with Borrow".
+//
+// Mnemonic : SBB
+// Supported forms : (8 forms)
+//
+// * SBBL imm32, eax
+// * SBBL imm8, r32
+// * SBBL imm32, r32
+// * SBBL r32, r32
+// * SBBL m32, r32
+// * SBBL imm8, m32
+// * SBBL imm32, m32
+// * SBBL r32, m32
+//
+func (self *Program) SBBL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SBBL", 2, Operands { v0, v1 })
+ // SBBL imm32, eax
+ if isImm32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x1d)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SBBL imm8, r32
+ if isImm8Ext(v0, 4) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SBBL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SBBL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x1b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SBBL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x1b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // SBBL imm8, m32
+ if isImm8Ext(v0, 4) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SBBL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SBBL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x19)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SBBL")
+ }
+ return p
+}
+
+// SBBQ performs "Subtract with Borrow".
+//
+// Mnemonic : SBB
+// Supported forms : (8 forms)
+//
+// * SBBQ imm32, rax
+// * SBBQ imm8, r64
+// * SBBQ imm32, r64
+// * SBBQ r64, r64
+// * SBBQ m64, r64
+// * SBBQ imm8, m64
+// * SBBQ imm32, m64
+// * SBBQ r64, m64
+//
+func (self *Program) SBBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SBBQ", 2, Operands { v0, v1 })
+ // SBBQ imm32, rax
+ if isImm32(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x1d)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SBBQ imm8, r64
+ if isImm8Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x83)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SBBQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x81)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SBBQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x1b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SBBQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x1b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // SBBQ imm8, m64
+ if isImm8Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x83)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SBBQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x81)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SBBQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x19)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SBBQ")
+ }
+ return p
+}
+
+// SBBW performs "Subtract with Borrow".
+//
+// Mnemonic : SBB
+// Supported forms : (8 forms)
+//
+// * SBBW imm16, ax
+// * SBBW imm8, r16
+// * SBBW imm16, r16
+// * SBBW r16, r16
+// * SBBW m16, r16
+// * SBBW imm8, m16
+// * SBBW imm16, m16
+// * SBBW r16, m16
+//
+func (self *Program) SBBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SBBW", 2, Operands { v0, v1 })
+ // SBBW imm16, ax
+ if isImm16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x1d)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // SBBW imm8, r16
+ if isImm8Ext(v0, 2) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SBBW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // SBBW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x1b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SBBW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x1b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // SBBW imm8, m16
+ if isImm8Ext(v0, 2) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SBBW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(3, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // SBBW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x19)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SBBW")
+ }
+ return p
+}
+
+// SETA performs "Set byte if above (CF == 0 and ZF == 0)".
+//
+// Mnemonic : SETA
+// Supported forms : (2 forms)
+//
+// * SETA r8
+// * SETA m8
+//
+func (self *Program) SETA(v0 interface{}) *Instruction {
+ p := self.alloc("SETA", 1, Operands { v0 })
+ // SETA r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETA m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x97)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETA")
+ }
+ return p
+}
+
+// SETAE performs "Set byte if above or equal (CF == 0)".
+//
+// Mnemonic : SETAE
+// Supported forms : (2 forms)
+//
+// * SETAE r8
+// * SETAE m8
+//
+func (self *Program) SETAE(v0 interface{}) *Instruction {
+ p := self.alloc("SETAE", 1, Operands { v0 })
+ // SETAE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x93)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETAE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x93)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETAE")
+ }
+ return p
+}
+
+// SETB performs "Set byte if below (CF == 1)".
+//
+// Mnemonic : SETB
+// Supported forms : (2 forms)
+//
+// * SETB r8
+// * SETB m8
+//
+func (self *Program) SETB(v0 interface{}) *Instruction {
+ p := self.alloc("SETB", 1, Operands { v0 })
+ // SETB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x92)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x92)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETB")
+ }
+ return p
+}
+
+// SETBE performs "Set byte if below or equal (CF == 1 or ZF == 1)".
+//
+// Mnemonic : SETBE
+// Supported forms : (2 forms)
+//
+// * SETBE r8
+// * SETBE m8
+//
+func (self *Program) SETBE(v0 interface{}) *Instruction {
+ p := self.alloc("SETBE", 1, Operands { v0 })
+ // SETBE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETBE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x96)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETBE")
+ }
+ return p
+}
+
+// SETC performs "Set byte if carry (CF == 1)".
+//
+// Mnemonic : SETC
+// Supported forms : (2 forms)
+//
+// * SETC r8
+// * SETC m8
+//
+func (self *Program) SETC(v0 interface{}) *Instruction {
+ p := self.alloc("SETC", 1, Operands { v0 })
+ // SETC r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x92)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETC m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x92)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETC")
+ }
+ return p
+}
+
+// SETE performs "Set byte if equal (ZF == 1)".
+//
+// Mnemonic : SETE
+// Supported forms : (2 forms)
+//
+// * SETE r8
+// * SETE m8
+//
+func (self *Program) SETE(v0 interface{}) *Instruction {
+ p := self.alloc("SETE", 1, Operands { v0 })
+ // SETE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x94)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x94)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETE")
+ }
+ return p
+}
+
+// SETG performs "Set byte if greater (ZF == 0 and SF == OF)".
+//
+// Mnemonic : SETG
+// Supported forms : (2 forms)
+//
+// * SETG r8
+// * SETG m8
+//
+func (self *Program) SETG(v0 interface{}) *Instruction {
+ p := self.alloc("SETG", 1, Operands { v0 })
+ // SETG r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9f)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETG m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9f)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETG")
+ }
+ return p
+}
+
+// SETGE performs "Set byte if greater or equal (SF == OF)".
+//
+// Mnemonic : SETGE
+// Supported forms : (2 forms)
+//
+// * SETGE r8
+// * SETGE m8
+//
+func (self *Program) SETGE(v0 interface{}) *Instruction {
+ p := self.alloc("SETGE", 1, Operands { v0 })
+ // SETGE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9d)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETGE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9d)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETGE")
+ }
+ return p
+}
+
+// SETL performs "Set byte if less (SF != OF)".
+//
+// Mnemonic : SETL
+// Supported forms : (2 forms)
+//
+// * SETL r8
+// * SETL m8
+//
+func (self *Program) SETL(v0 interface{}) *Instruction {
+ p := self.alloc("SETL", 1, Operands { v0 })
+ // SETL r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETL m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9c)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETL")
+ }
+ return p
+}
+
+// SETLE performs "Set byte if less or equal (ZF == 1 or SF != OF)".
+//
+// Mnemonic : SETLE
+// Supported forms : (2 forms)
+//
+// * SETLE r8
+// * SETLE m8
+//
+func (self *Program) SETLE(v0 interface{}) *Instruction {
+ p := self.alloc("SETLE", 1, Operands { v0 })
+ // SETLE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETLE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9e)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETLE")
+ }
+ return p
+}
+
+// SETNA performs "Set byte if not above (CF == 1 or ZF == 1)".
+//
+// Mnemonic : SETNA
+// Supported forms : (2 forms)
+//
+// * SETNA r8
+// * SETNA m8
+//
+func (self *Program) SETNA(v0 interface{}) *Instruction {
+ p := self.alloc("SETNA", 1, Operands { v0 })
+ // SETNA r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNA m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x96)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNA")
+ }
+ return p
+}
+
+// SETNAE performs "Set byte if not above or equal (CF == 1)".
+//
+// Mnemonic : SETNAE
+// Supported forms : (2 forms)
+//
+// * SETNAE r8
+// * SETNAE m8
+//
+func (self *Program) SETNAE(v0 interface{}) *Instruction {
+ p := self.alloc("SETNAE", 1, Operands { v0 })
+ // SETNAE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x92)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNAE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x92)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNAE")
+ }
+ return p
+}
+
+// SETNB performs "Set byte if not below (CF == 0)".
+//
+// Mnemonic : SETNB
+// Supported forms : (2 forms)
+//
+// * SETNB r8
+// * SETNB m8
+//
+func (self *Program) SETNB(v0 interface{}) *Instruction {
+ p := self.alloc("SETNB", 1, Operands { v0 })
+ // SETNB r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x93)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNB m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x93)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNB")
+ }
+ return p
+}
+
+// SETNBE performs "Set byte if not below or equal (CF == 0 and ZF == 0)".
+//
+// Mnemonic : SETNBE
+// Supported forms : (2 forms)
+//
+// * SETNBE r8
+// * SETNBE m8
+//
+func (self *Program) SETNBE(v0 interface{}) *Instruction {
+ p := self.alloc("SETNBE", 1, Operands { v0 })
+ // SETNBE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNBE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x97)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNBE")
+ }
+ return p
+}
+
+// SETNC performs "Set byte if not carry (CF == 0)".
+//
+// Mnemonic : SETNC
+// Supported forms : (2 forms)
+//
+// * SETNC r8
+// * SETNC m8
+//
+func (self *Program) SETNC(v0 interface{}) *Instruction {
+ p := self.alloc("SETNC", 1, Operands { v0 })
+ // SETNC r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x93)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNC m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x93)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNC")
+ }
+ return p
+}
+
+// SETNE performs "Set byte if not equal (ZF == 0)".
+//
+// Mnemonic : SETNE
+// Supported forms : (2 forms)
+//
+// * SETNE r8
+// * SETNE m8
+//
+func (self *Program) SETNE(v0 interface{}) *Instruction {
+ p := self.alloc("SETNE", 1, Operands { v0 })
+ // SETNE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x95)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x95)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNE")
+ }
+ return p
+}
+
+// SETNG performs "Set byte if not greater (ZF == 1 or SF != OF)".
+//
+// Mnemonic : SETNG
+// Supported forms : (2 forms)
+//
+// * SETNG r8
+// * SETNG m8
+//
+func (self *Program) SETNG(v0 interface{}) *Instruction {
+ p := self.alloc("SETNG", 1, Operands { v0 })
+ // SETNG r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNG m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9e)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNG")
+ }
+ return p
+}
+
+// SETNGE performs "Set byte if not greater or equal (SF != OF)".
+//
+// Mnemonic : SETNGE
+// Supported forms : (2 forms)
+//
+// * SETNGE r8
+// * SETNGE m8
+//
+func (self *Program) SETNGE(v0 interface{}) *Instruction {
+ p := self.alloc("SETNGE", 1, Operands { v0 })
+ // SETNGE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNGE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9c)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNGE")
+ }
+ return p
+}
+
+// SETNL performs "Set byte if not less (SF == OF)".
+//
+// Mnemonic : SETNL
+// Supported forms : (2 forms)
+//
+// * SETNL r8
+// * SETNL m8
+//
+func (self *Program) SETNL(v0 interface{}) *Instruction {
+ p := self.alloc("SETNL", 1, Operands { v0 })
+ // SETNL r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9d)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNL m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9d)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNL")
+ }
+ return p
+}
+
+// SETNLE performs "Set byte if not less or equal (ZF == 0 and SF == OF)".
+//
+// Mnemonic : SETNLE
+// Supported forms : (2 forms)
+//
+// * SETNLE r8
+// * SETNLE m8
+//
+func (self *Program) SETNLE(v0 interface{}) *Instruction {
+ p := self.alloc("SETNLE", 1, Operands { v0 })
+ // SETNLE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9f)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNLE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9f)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNLE")
+ }
+ return p
+}
+
+// SETNO performs "Set byte if not overflow (OF == 0)".
+//
+// Mnemonic : SETNO
+// Supported forms : (2 forms)
+//
+// * SETNO r8
+// * SETNO m8
+//
+func (self *Program) SETNO(v0 interface{}) *Instruction {
+ p := self.alloc("SETNO", 1, Operands { v0 })
+ // SETNO r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x91)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNO m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x91)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNO")
+ }
+ return p
+}
+
+// SETNP performs "Set byte if not parity (PF == 0)".
+//
+// Mnemonic : SETNP
+// Supported forms : (2 forms)
+//
+// * SETNP r8
+// * SETNP m8
+//
+func (self *Program) SETNP(v0 interface{}) *Instruction {
+ p := self.alloc("SETNP", 1, Operands { v0 })
+ // SETNP r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNP m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9b)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNP")
+ }
+ return p
+}
+
+// SETNS performs "Set byte if not sign (SF == 0)".
+//
+// Mnemonic : SETNS
+// Supported forms : (2 forms)
+//
+// * SETNS r8
+// * SETNS m8
+//
+func (self *Program) SETNS(v0 interface{}) *Instruction {
+ p := self.alloc("SETNS", 1, Operands { v0 })
+ // SETNS r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNS m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x99)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNS")
+ }
+ return p
+}
+
+// SETNZ performs "Set byte if not zero (ZF == 0)".
+//
+// Mnemonic : SETNZ
+// Supported forms : (2 forms)
+//
+// * SETNZ r8
+// * SETNZ m8
+//
+func (self *Program) SETNZ(v0 interface{}) *Instruction {
+ p := self.alloc("SETNZ", 1, Operands { v0 })
+ // SETNZ r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x95)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETNZ m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x95)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETNZ")
+ }
+ return p
+}
+
+// SETO performs "Set byte if overflow (OF == 1)".
+//
+// Mnemonic : SETO
+// Supported forms : (2 forms)
+//
+// * SETO r8
+// * SETO m8
+//
+func (self *Program) SETO(v0 interface{}) *Instruction {
+ p := self.alloc("SETO", 1, Operands { v0 })
+ // SETO r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x90)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETO m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x90)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETO")
+ }
+ return p
+}
+
+// SETP performs "Set byte if parity (PF == 1)".
+//
+// Mnemonic : SETP
+// Supported forms : (2 forms)
+//
+// * SETP r8
+// * SETP m8
+//
+func (self *Program) SETP(v0 interface{}) *Instruction {
+ p := self.alloc("SETP", 1, Operands { v0 })
+ // SETP r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETP m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9a)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETP")
+ }
+ return p
+}
+
+// SETPE performs "Set byte if parity even (PF == 1)".
+//
+// Mnemonic : SETPE
+// Supported forms : (2 forms)
+//
+// * SETPE r8
+// * SETPE m8
+//
+func (self *Program) SETPE(v0 interface{}) *Instruction {
+ p := self.alloc("SETPE", 1, Operands { v0 })
+ // SETPE r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETPE m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9a)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETPE")
+ }
+ return p
+}
+
+// SETPO performs "Set byte if parity odd (PF == 0)".
+//
+// Mnemonic : SETPO
+// Supported forms : (2 forms)
+//
+// * SETPO r8
+// * SETPO m8
+//
+func (self *Program) SETPO(v0 interface{}) *Instruction {
+ p := self.alloc("SETPO", 1, Operands { v0 })
+ // SETPO r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETPO m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x9b)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETPO")
+ }
+ return p
+}
+
+// SETS performs "Set byte if sign (SF == 1)".
+//
+// Mnemonic : SETS
+// Supported forms : (2 forms)
+//
+// * SETS r8
+// * SETS m8
+//
+func (self *Program) SETS(v0 interface{}) *Instruction {
+ p := self.alloc("SETS", 1, Operands { v0 })
+ // SETS r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETS m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x98)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETS")
+ }
+ return p
+}
+
+// SETZ performs "Set byte if zero (ZF == 1)".
+//
+// Mnemonic : SETZ
+// Supported forms : (2 forms)
+//
+// * SETZ r8
+// * SETZ m8
+//
+func (self *Program) SETZ(v0 interface{}) *Instruction {
+ p := self.alloc("SETZ", 1, Operands { v0 })
+ // SETZ r8
+ if isReg8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0x94)
+ m.emit(0xc0 | lcode(v[0]))
+ })
+ }
+ // SETZ m8
+ if isM8(v0) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x94)
+ m.mrsd(0, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SETZ")
+ }
+ return p
+}
+
+// SFENCE performs "Store Fence".
+//
+// Mnemonic : SFENCE
+// Supported forms : (1 form)
+//
+// * SFENCE [MMX+]
+//
+func (self *Program) SFENCE() *Instruction {
+ p := self.alloc("SFENCE", 0, Operands { })
+ // SFENCE
+ self.require(ISA_MMX_PLUS)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0xae)
+ m.emit(0xf8)
+ })
+ return p
+}
+
+// SHA1MSG1 performs "Perform an Intermediate Calculation for the Next Four SHA1 Message Doublewords".
+//
+// Mnemonic : SHA1MSG1
+// Supported forms : (2 forms)
+//
+// * SHA1MSG1 xmm, xmm [SHA]
+// * SHA1MSG1 m128, xmm [SHA]
+//
+func (self *Program) SHA1MSG1(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHA1MSG1", 2, Operands { v0, v1 })
+ // SHA1MSG1 xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xc9)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SHA1MSG1 m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xc9)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHA1MSG1")
+ }
+ return p
+}
+
+// SHA1MSG2 performs "Perform a Final Calculation for the Next Four SHA1 Message Doublewords".
+//
+// Mnemonic : SHA1MSG2
+// Supported forms : (2 forms)
+//
+// * SHA1MSG2 xmm, xmm [SHA]
+// * SHA1MSG2 m128, xmm [SHA]
+//
+func (self *Program) SHA1MSG2(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHA1MSG2", 2, Operands { v0, v1 })
+ // SHA1MSG2 xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xca)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SHA1MSG2 m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xca)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHA1MSG2")
+ }
+ return p
+}
+
+// SHA1NEXTE performs "Calculate SHA1 State Variable E after Four Rounds".
+//
+// Mnemonic : SHA1NEXTE
+// Supported forms : (2 forms)
+//
+// * SHA1NEXTE xmm, xmm [SHA]
+// * SHA1NEXTE m128, xmm [SHA]
+//
+func (self *Program) SHA1NEXTE(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHA1NEXTE", 2, Operands { v0, v1 })
+ // SHA1NEXTE xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xc8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SHA1NEXTE m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xc8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHA1NEXTE")
+ }
+ return p
+}
+
+// SHA1RNDS4 performs "Perform Four Rounds of SHA1 Operation".
+//
+// Mnemonic : SHA1RNDS4
+// Supported forms : (2 forms)
+//
+// * SHA1RNDS4 imm8, xmm, xmm [SHA]
+// * SHA1RNDS4 imm8, m128, xmm [SHA]
+//
+func (self *Program) SHA1RNDS4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHA1RNDS4", 3, Operands { v0, v1, v2 })
+ // SHA1RNDS4 imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0xcc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHA1RNDS4 imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x3a)
+ m.emit(0xcc)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHA1RNDS4")
+ }
+ return p
+}
+
+// SHA256MSG1 performs "Perform an Intermediate Calculation for the Next Four SHA256 Message Doublewords".
+//
+// Mnemonic : SHA256MSG1
+// Supported forms : (2 forms)
+//
+// * SHA256MSG1 xmm, xmm [SHA]
+// * SHA256MSG1 m128, xmm [SHA]
+//
+func (self *Program) SHA256MSG1(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHA256MSG1", 2, Operands { v0, v1 })
+ // SHA256MSG1 xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xcc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SHA256MSG1 m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xcc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHA256MSG1")
+ }
+ return p
+}
+
+// SHA256MSG2 performs "Perform a Final Calculation for the Next Four SHA256 Message Doublewords".
+//
+// Mnemonic : SHA256MSG2
+// Supported forms : (2 forms)
+//
+// * SHA256MSG2 xmm, xmm [SHA]
+// * SHA256MSG2 m128, xmm [SHA]
+//
+func (self *Program) SHA256MSG2(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHA256MSG2", 2, Operands { v0, v1 })
+ // SHA256MSG2 xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xcd)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SHA256MSG2 m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xcd)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHA256MSG2")
+ }
+ return p
+}
+
+// SHA256RNDS2 performs "Perform Two Rounds of SHA256 Operation".
+//
+// Mnemonic : SHA256RNDS2
+// Supported forms : (2 forms)
+//
+// * SHA256RNDS2 xmm0, xmm, xmm [SHA]
+// * SHA256RNDS2 xmm0, m128, xmm [SHA]
+//
+func (self *Program) SHA256RNDS2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHA256RNDS2", 3, Operands { v0, v1, v2 })
+ // SHA256RNDS2 xmm0, xmm, xmm
+ if v0 == XMM0 && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xcb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // SHA256RNDS2 xmm0, m128, xmm
+ if v0 == XMM0 && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SHA)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0x38)
+ m.emit(0xcb)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHA256RNDS2")
+ }
+ return p
+}
+
+// SHLB performs "Logical Shift Left".
+//
+// Mnemonic : SHL
+// Supported forms : (6 forms)
+//
+// * SHLB 1, r8
+// * SHLB imm8, r8
+// * SHLB cl, r8
+// * SHLB 1, m8
+// * SHLB imm8, m8
+// * SHLB cl, m8
+//
+func (self *Program) SHLB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHLB", 2, Operands { v0, v1 })
+ // SHLB 1, r8
+ if isConst1(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd0)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SHLB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xc0)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLB cl, r8
+ if v0 == CL && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd2)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SHLB 1, m8
+ if isConst1(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd0)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ // SHLB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc0)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLB cl, m8
+ if v0 == CL && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd2)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHLB")
+ }
+ return p
+}
+
+// SHLDL performs "Integer Double Precision Shift Left".
+//
+// Mnemonic : SHLD
+// Supported forms : (4 forms)
+//
+// * SHLDL imm8, r32, r32
+// * SHLDL cl, r32, r32
+// * SHLDL imm8, r32, m32
+// * SHLDL cl, r32, m32
+//
+func (self *Program) SHLDL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHLDL", 3, Operands { v0, v1, v2 })
+ // SHLDL imm8, r32, r32
+ if isImm8(v0) && isReg32(v1) && isReg32(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0xa4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLDL cl, r32, r32
+ if v0 == CL && isReg32(v1) && isReg32(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0xa5)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ })
+ }
+ // SHLDL imm8, r32, m32
+ if isImm8(v0) && isReg32(v1) && isM32(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0xa4)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLDL cl, r32, m32
+ if v0 == CL && isReg32(v1) && isM32(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0xa5)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHLDL")
+ }
+ return p
+}
+
+// SHLDQ performs "Integer Double Precision Shift Left".
+//
+// Mnemonic : SHLD
+// Supported forms : (4 forms)
+//
+// * SHLDQ imm8, r64, r64
+// * SHLDQ cl, r64, r64
+// * SHLDQ imm8, r64, m64
+// * SHLDQ cl, r64, m64
+//
+func (self *Program) SHLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHLDQ", 3, Operands { v0, v1, v2 })
+ // SHLDQ imm8, r64, r64
+ if isImm8(v0) && isReg64(v1) && isReg64(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
+ m.emit(0x0f)
+ m.emit(0xa4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLDQ cl, r64, r64
+ if v0 == CL && isReg64(v1) && isReg64(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
+ m.emit(0x0f)
+ m.emit(0xa5)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ })
+ }
+ // SHLDQ imm8, r64, m64
+ if isImm8(v0) && isReg64(v1) && isM64(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[2]))
+ m.emit(0x0f)
+ m.emit(0xa4)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLDQ cl, r64, m64
+ if v0 == CL && isReg64(v1) && isM64(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[2]))
+ m.emit(0x0f)
+ m.emit(0xa5)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHLDQ")
+ }
+ return p
+}
+
+// SHLDW performs "Integer Double Precision Shift Left".
+//
+// Mnemonic : SHLD
+// Supported forms : (4 forms)
+//
+// * SHLDW imm8, r16, r16
+// * SHLDW cl, r16, r16
+// * SHLDW imm8, r16, m16
+// * SHLDW cl, r16, m16
+//
+func (self *Program) SHLDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHLDW", 3, Operands { v0, v1, v2 })
+ // SHLDW imm8, r16, r16
+ if isImm8(v0) && isReg16(v1) && isReg16(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0xa4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLDW cl, r16, r16
+ if v0 == CL && isReg16(v1) && isReg16(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0xa5)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ })
+ }
+ // SHLDW imm8, r16, m16
+ if isImm8(v0) && isReg16(v1) && isM16(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0xa4)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLDW cl, r16, m16
+ if v0 == CL && isReg16(v1) && isM16(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0xa5)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHLDW")
+ }
+ return p
+}
+
+// SHLL performs "Logical Shift Left".
+//
+// Mnemonic : SHL
+// Supported forms : (6 forms)
+//
+// * SHLL 1, r32
+// * SHLL imm8, r32
+// * SHLL cl, r32
+// * SHLL 1, m32
+// * SHLL imm8, m32
+// * SHLL cl, m32
+//
+func (self *Program) SHLL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHLL", 2, Operands { v0, v1 })
+ // SHLL 1, r32
+ if isConst1(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SHLL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLL cl, r32
+ if v0 == CL && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SHLL 1, m32
+ if isConst1(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ // SHLL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLL cl, m32
+ if v0 == CL && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHLL")
+ }
+ return p
+}
+
+// SHLQ performs "Logical Shift Left".
+//
+// Mnemonic : SHL
+// Supported forms : (6 forms)
+//
+// * SHLQ 1, r64
+// * SHLQ imm8, r64
+// * SHLQ cl, r64
+// * SHLQ 1, m64
+// * SHLQ imm8, m64
+// * SHLQ cl, m64
+//
+func (self *Program) SHLQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHLQ", 2, Operands { v0, v1 })
+ // SHLQ 1, r64
+ if isConst1(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SHLQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xc1)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLQ cl, r64
+ if v0 == CL && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SHLQ 1, m64
+ if isConst1(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd1)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ // SHLQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xc1)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLQ cl, m64
+ if v0 == CL && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd3)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHLQ")
+ }
+ return p
+}
+
+// SHLW performs "Logical Shift Left".
+//
+// Mnemonic : SHL
+// Supported forms : (6 forms)
+//
+// * SHLW 1, r16
+// * SHLW imm8, r16
+// * SHLW cl, r16
+// * SHLW 1, m16
+// * SHLW imm8, m16
+// * SHLW cl, m16
+//
+func (self *Program) SHLW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHLW", 2, Operands { v0, v1 })
+ // SHLW 1, r16
+ if isConst1(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SHLW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLW cl, r16
+ if v0 == CL && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xe0 | lcode(v[1]))
+ })
+ }
+ // SHLW 1, m16
+ if isConst1(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ // SHLW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(4, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHLW cl, m16
+ if v0 == CL && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(4, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHLW")
+ }
+ return p
+}
+
+// SHLXL performs "Logical Shift Left Without Affecting Flags".
+//
+// Mnemonic : SHLX
+// Supported forms : (2 forms)
+//
+// * SHLXL r32, r32, r32 [BMI2]
+// * SHLXL r32, m32, r32 [BMI2]
+//
+func (self *Program) SHLXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHLXL", 3, Operands { v0, v1, v2 })
+ // SHLXL r32, r32, r32
+ if isReg32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[0]) << 3))
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // SHLXL r32, m32, r32
+ if isReg32(v0) && isM32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf7)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHLXL")
+ }
+ return p
+}
+
+// SHLXQ performs "Logical Shift Left Without Affecting Flags".
+//
+// Mnemonic : SHLX
+// Supported forms : (2 forms)
+//
+// * SHLXQ r64, r64, r64 [BMI2]
+// * SHLXQ r64, m64, r64 [BMI2]
+//
+func (self *Program) SHLXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHLXQ", 3, Operands { v0, v1, v2 })
+ // SHLXQ r64, r64, r64
+ if isReg64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[0]) << 3))
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // SHLXQ r64, m64, r64
+ if isReg64(v0) && isM64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf7)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHLXQ")
+ }
+ return p
+}
+
+// SHRB performs "Logical Shift Right".
+//
+// Mnemonic : SHR
+// Supported forms : (6 forms)
+//
+// * SHRB 1, r8
+// * SHRB imm8, r8
+// * SHRB cl, r8
+// * SHRB 1, m8
+// * SHRB imm8, m8
+// * SHRB cl, m8
+//
+func (self *Program) SHRB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHRB", 2, Operands { v0, v1 })
+ // SHRB 1, r8
+ if isConst1(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd0)
+ m.emit(0xe8 | lcode(v[1]))
+ })
+ }
+ // SHRB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xc0)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRB cl, r8
+ if v0 == CL && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xd2)
+ m.emit(0xe8 | lcode(v[1]))
+ })
+ }
+ // SHRB 1, m8
+ if isConst1(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd0)
+ m.mrsd(5, addr(v[1]), 1)
+ })
+ }
+ // SHRB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc0)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRB cl, m8
+ if v0 == CL && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd2)
+ m.mrsd(5, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHRB")
+ }
+ return p
+}
+
+// SHRDL performs "Integer Double Precision Shift Right".
+//
+// Mnemonic : SHRD
+// Supported forms : (4 forms)
+//
+// * SHRDL imm8, r32, r32
+// * SHRDL cl, r32, r32
+// * SHRDL imm8, r32, m32
+// * SHRDL cl, r32, m32
+//
+func (self *Program) SHRDL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHRDL", 3, Operands { v0, v1, v2 })
+ // SHRDL imm8, r32, r32
+ if isImm8(v0) && isReg32(v1) && isReg32(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRDL cl, r32, r32
+ if v0 == CL && isReg32(v1) && isReg32(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0xad)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ })
+ }
+ // SHRDL imm8, r32, m32
+ if isImm8(v0) && isReg32(v1) && isM32(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0xac)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRDL cl, r32, m32
+ if v0 == CL && isReg32(v1) && isM32(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0xad)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHRDL")
+ }
+ return p
+}
+
+// SHRDQ performs "Integer Double Precision Shift Right".
+//
+// Mnemonic : SHRD
+// Supported forms : (4 forms)
+//
+// * SHRDQ imm8, r64, r64
+// * SHRDQ cl, r64, r64
+// * SHRDQ imm8, r64, m64
+// * SHRDQ cl, r64, m64
+//
+func (self *Program) SHRDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHRDQ", 3, Operands { v0, v1, v2 })
+ // SHRDQ imm8, r64, r64
+ if isImm8(v0) && isReg64(v1) && isReg64(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
+ m.emit(0x0f)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRDQ cl, r64, r64
+ if v0 == CL && isReg64(v1) && isReg64(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2]))
+ m.emit(0x0f)
+ m.emit(0xad)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ })
+ }
+ // SHRDQ imm8, r64, m64
+ if isImm8(v0) && isReg64(v1) && isM64(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[2]))
+ m.emit(0x0f)
+ m.emit(0xac)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRDQ cl, r64, m64
+ if v0 == CL && isReg64(v1) && isM64(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[2]))
+ m.emit(0x0f)
+ m.emit(0xad)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHRDQ")
+ }
+ return p
+}
+
+// SHRDW performs "Integer Double Precision Shift Right".
+//
+// Mnemonic : SHRD
+// Supported forms : (4 forms)
+//
+// * SHRDW imm8, r16, r16
+// * SHRDW cl, r16, r16
+// * SHRDW imm8, r16, m16
+// * SHRDW cl, r16, m16
+//
+func (self *Program) SHRDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHRDW", 3, Operands { v0, v1, v2 })
+ // SHRDW imm8, r16, r16
+ if isImm8(v0) && isReg16(v1) && isReg16(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRDW cl, r16, r16
+ if v0 == CL && isReg16(v1) && isReg16(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[2], false)
+ m.emit(0x0f)
+ m.emit(0xad)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ })
+ }
+ // SHRDW imm8, r16, m16
+ if isImm8(v0) && isReg16(v1) && isM16(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0xac)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRDW cl, r16, m16
+ if v0 == CL && isReg16(v1) && isM16(v2) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[2]), false)
+ m.emit(0x0f)
+ m.emit(0xad)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHRDW")
+ }
+ return p
+}
+
+// SHRL performs "Logical Shift Right".
+//
+// Mnemonic : SHR
+// Supported forms : (6 forms)
+//
+// * SHRL 1, r32
+// * SHRL imm8, r32
+// * SHRL cl, r32
+// * SHRL 1, m32
+// * SHRL imm8, m32
+// * SHRL cl, m32
+//
+func (self *Program) SHRL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHRL", 2, Operands { v0, v1 })
+ // SHRL 1, r32
+ if isConst1(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xe8 | lcode(v[1]))
+ })
+ }
+ // SHRL imm8, r32
+ if isImm8(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRL cl, r32
+ if v0 == CL && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xe8 | lcode(v[1]))
+ })
+ }
+ // SHRL 1, m32
+ if isConst1(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(5, addr(v[1]), 1)
+ })
+ }
+ // SHRL imm8, m32
+ if isImm8(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRL cl, m32
+ if v0 == CL && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(5, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHRL")
+ }
+ return p
+}
+
+// SHRQ performs "Logical Shift Right".
+//
+// Mnemonic : SHR
+// Supported forms : (6 forms)
+//
+// * SHRQ 1, r64
+// * SHRQ imm8, r64
+// * SHRQ cl, r64
+// * SHRQ 1, m64
+// * SHRQ imm8, m64
+// * SHRQ cl, m64
+//
+func (self *Program) SHRQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHRQ", 2, Operands { v0, v1 })
+ // SHRQ 1, r64
+ if isConst1(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xe8 | lcode(v[1]))
+ })
+ }
+ // SHRQ imm8, r64
+ if isImm8(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xc1)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRQ cl, r64
+ if v0 == CL && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xe8 | lcode(v[1]))
+ })
+ }
+ // SHRQ 1, m64
+ if isConst1(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd1)
+ m.mrsd(5, addr(v[1]), 1)
+ })
+ }
+ // SHRQ imm8, m64
+ if isImm8(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xc1)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRQ cl, m64
+ if v0 == CL && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xd3)
+ m.mrsd(5, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHRQ")
+ }
+ return p
+}
+
+// SHRW performs "Logical Shift Right".
+//
+// Mnemonic : SHR
+// Supported forms : (6 forms)
+//
+// * SHRW 1, r16
+// * SHRW imm8, r16
+// * SHRW cl, r16
+// * SHRW 1, m16
+// * SHRW imm8, m16
+// * SHRW cl, m16
+//
+func (self *Program) SHRW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SHRW", 2, Operands { v0, v1 })
+ // SHRW 1, r16
+ if isConst1(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd1)
+ m.emit(0xe8 | lcode(v[1]))
+ })
+ }
+ // SHRW imm8, r16
+ if isImm8(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xc1)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRW cl, r16
+ if v0 == CL && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xd3)
+ m.emit(0xe8 | lcode(v[1]))
+ })
+ }
+ // SHRW 1, m16
+ if isConst1(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd1)
+ m.mrsd(5, addr(v[1]), 1)
+ })
+ }
+ // SHRW imm8, m16
+ if isImm8(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xc1)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHRW cl, m16
+ if v0 == CL && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xd3)
+ m.mrsd(5, addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHRW")
+ }
+ return p
+}
+
+// SHRXL performs "Logical Shift Right Without Affecting Flags".
+//
+// Mnemonic : SHRX
+// Supported forms : (2 forms)
+//
+// * SHRXL r32, r32, r32 [BMI2]
+// * SHRXL r32, m32, r32 [BMI2]
+//
+func (self *Program) SHRXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHRXL", 3, Operands { v0, v1, v2 })
+ // SHRXL r32, r32, r32
+ if isReg32(v0) && isReg32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7b ^ (hlcode(v[0]) << 3))
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // SHRXL r32, m32, r32
+ if isReg32(v0) && isM32(v1) && isReg32(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf7)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHRXL")
+ }
+ return p
+}
+
+// SHRXQ performs "Logical Shift Right Without Affecting Flags".
+//
+// Mnemonic : SHRX
+// Supported forms : (2 forms)
+//
+// * SHRXQ r64, r64, r64 [BMI2]
+// * SHRXQ r64, m64, r64 [BMI2]
+//
+func (self *Program) SHRXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHRXQ", 3, Operands { v0, v1, v2 })
+ // SHRXQ r64, r64, r64
+ if isReg64(v0) && isReg64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xfb ^ (hlcode(v[0]) << 3))
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // SHRXQ r64, m64, r64
+ if isReg64(v0) && isM64(v1) && isReg64(v2) {
+ self.require(ISA_BMI2)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0xf7)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHRXQ")
+ }
+ return p
+}
+
+// SHUFPD performs "Shuffle Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : SHUFPD
+// Supported forms : (2 forms)
+//
+// * SHUFPD imm8, xmm, xmm [SSE2]
+// * SHUFPD imm8, m128, xmm [SSE2]
+//
+func (self *Program) SHUFPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHUFPD", 3, Operands { v0, v1, v2 })
+ // SHUFPD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHUFPD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc6)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHUFPD")
+ }
+ return p
+}
+
+// SHUFPS performs "Shuffle Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : SHUFPS
+// Supported forms : (2 forms)
+//
+// * SHUFPS imm8, xmm, xmm [SSE]
+// * SHUFPS imm8, m128, xmm [SSE]
+//
+func (self *Program) SHUFPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("SHUFPS", 3, Operands { v0, v1, v2 })
+ // SHUFPS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SHUFPS imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[2]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc6)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SHUFPS")
+ }
+ return p
+}
+
+// SQRTPD performs "Compute Square Roots of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : SQRTPD
+// Supported forms : (2 forms)
+//
+// * SQRTPD xmm, xmm [SSE2]
+// * SQRTPD m128, xmm [SSE2]
+//
+func (self *Program) SQRTPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SQRTPD", 2, Operands { v0, v1 })
+ // SQRTPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SQRTPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SQRTPD")
+ }
+ return p
+}
+
+// SQRTPS performs "Compute Square Roots of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : SQRTPS
+// Supported forms : (2 forms)
+//
+// * SQRTPS xmm, xmm [SSE]
+// * SQRTPS m128, xmm [SSE]
+//
+func (self *Program) SQRTPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SQRTPS", 2, Operands { v0, v1 })
+ // SQRTPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SQRTPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SQRTPS")
+ }
+ return p
+}
+
+// SQRTSD performs "Compute Square Root of Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : SQRTSD
+// Supported forms : (2 forms)
+//
+// * SQRTSD xmm, xmm [SSE2]
+// * SQRTSD m64, xmm [SSE2]
+//
+func (self *Program) SQRTSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SQRTSD", 2, Operands { v0, v1 })
+ // SQRTSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SQRTSD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SQRTSD")
+ }
+ return p
+}
+
+// SQRTSS performs "Compute Square Root of Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : SQRTSS
+// Supported forms : (2 forms)
+//
+// * SQRTSS xmm, xmm [SSE]
+// * SQRTSS m32, xmm [SSE]
+//
+func (self *Program) SQRTSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SQRTSS", 2, Operands { v0, v1 })
+ // SQRTSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SQRTSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SQRTSS")
+ }
+ return p
+}
+
+// STC performs "Set Carry Flag".
+//
+// Mnemonic : STC
+// Supported forms : (1 form)
+//
+// * STC
+//
+func (self *Program) STC() *Instruction {
+ p := self.alloc("STC", 0, Operands { })
+ // STC
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf9)
+ })
+ return p
+}
+
+// STD performs "Set Direction Flag".
+//
+// Mnemonic : STD
+// Supported forms : (1 form)
+//
+// * STD
+//
+func (self *Program) STD() *Instruction {
+ p := self.alloc("STD", 0, Operands { })
+ // STD
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xfd)
+ })
+ return p
+}
+
+// STMXCSR performs "Store MXCSR Register State".
+//
+// Mnemonic : STMXCSR
+// Supported forms : (1 form)
+//
+// * STMXCSR m32 [SSE]
+//
+func (self *Program) STMXCSR(v0 interface{}) *Instruction {
+ p := self.alloc("STMXCSR", 1, Operands { v0 })
+ // STMXCSR m32
+ if isM32(v0) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xae)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for STMXCSR")
+ }
+ return p
+}
+
+// SUBB performs "Subtract".
+//
+// Mnemonic : SUB
+// Supported forms : (6 forms)
+//
+// * SUBB imm8, al
+// * SUBB imm8, r8
+// * SUBB r8, r8
+// * SUBB m8, r8
+// * SUBB imm8, m8
+// * SUBB r8, m8
+//
+func (self *Program) SUBB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SUBB", 2, Operands { v0, v1 })
+ // SUBB imm8, al
+ if isImm8(v0) && v1 == AL {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x2c)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SUBB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0x80)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SUBB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SUBB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // SUBB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x80)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SUBB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x28)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SUBB")
+ }
+ return p
+}
+
+// SUBL performs "Subtract".
+//
+// Mnemonic : SUB
+// Supported forms : (8 forms)
+//
+// * SUBL imm32, eax
+// * SUBL imm8, r32
+// * SUBL imm32, r32
+// * SUBL r32, r32
+// * SUBL m32, r32
+// * SUBL imm8, m32
+// * SUBL imm32, m32
+// * SUBL r32, m32
+//
+func (self *Program) SUBL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SUBL", 2, Operands { v0, v1 })
+ // SUBL imm32, eax
+ if isImm32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x2d)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SUBL imm8, r32
+ if isImm8Ext(v0, 4) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SUBL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SUBL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x2b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SUBL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // SUBL imm8, m32
+ if isImm8Ext(v0, 4) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SUBL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SUBL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SUBL")
+ }
+ return p
+}
+
+// SUBPD performs "Subtract Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : SUBPD
+// Supported forms : (2 forms)
+//
+// * SUBPD xmm, xmm [SSE2]
+// * SUBPD m128, xmm [SSE2]
+//
+func (self *Program) SUBPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SUBPD", 2, Operands { v0, v1 })
+ // SUBPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SUBPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SUBPD")
+ }
+ return p
+}
+
+// SUBPS performs "Subtract Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : SUBPS
+// Supported forms : (2 forms)
+//
+// * SUBPS xmm, xmm [SSE]
+// * SUBPS m128, xmm [SSE]
+//
+func (self *Program) SUBPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SUBPS", 2, Operands { v0, v1 })
+ // SUBPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SUBPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SUBPS")
+ }
+ return p
+}
+
+// SUBQ performs "Subtract".
+//
+// Mnemonic : SUB
+// Supported forms : (8 forms)
+//
+// * SUBQ imm32, rax
+// * SUBQ imm8, r64
+// * SUBQ imm32, r64
+// * SUBQ r64, r64
+// * SUBQ m64, r64
+// * SUBQ imm8, m64
+// * SUBQ imm32, m64
+// * SUBQ r64, m64
+//
+func (self *Program) SUBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SUBQ", 2, Operands { v0, v1 })
+ // SUBQ imm32, rax
+ if isImm32(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x2d)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SUBQ imm8, r64
+ if isImm8Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x83)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SUBQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x81)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SUBQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x2b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SUBQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x2b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // SUBQ imm8, m64
+ if isImm8Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x83)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SUBQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x81)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // SUBQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SUBQ")
+ }
+ return p
+}
+
+// SUBSD performs "Subtract Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : SUBSD
+// Supported forms : (2 forms)
+//
+// * SUBSD xmm, xmm [SSE2]
+// * SUBSD m64, xmm [SSE2]
+//
+func (self *Program) SUBSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SUBSD", 2, Operands { v0, v1 })
+ // SUBSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SUBSD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf2)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SUBSD")
+ }
+ return p
+}
+
+// SUBSS performs "Subtract Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : SUBSS
+// Supported forms : (2 forms)
+//
+// * SUBSS xmm, xmm [SSE]
+// * SUBSS m32, xmm [SSE]
+//
+func (self *Program) SUBSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SUBSS", 2, Operands { v0, v1 })
+ // SUBSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SUBSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x5c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SUBSS")
+ }
+ return p
+}
+
+// SUBW performs "Subtract".
+//
+// Mnemonic : SUB
+// Supported forms : (8 forms)
+//
+// * SUBW imm16, ax
+// * SUBW imm8, r16
+// * SUBW imm16, r16
+// * SUBW r16, r16
+// * SUBW m16, r16
+// * SUBW imm8, m16
+// * SUBW imm16, m16
+// * SUBW r16, m16
+//
+func (self *Program) SUBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("SUBW", 2, Operands { v0, v1 })
+ // SUBW imm16, ax
+ if isImm16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x2d)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // SUBW imm8, r16
+ if isImm8Ext(v0, 2) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SUBW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xe8 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // SUBW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x2b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // SUBW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // SUBW imm8, m16
+ if isImm8Ext(v0, 2) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // SUBW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(5, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // SUBW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for SUBW")
+ }
+ return p
+}
+
+// SYSCALL performs "Fast System Call".
+//
+// Mnemonic : SYSCALL
+// Supported forms : (1 form)
+//
+// * SYSCALL
+//
+func (self *Program) SYSCALL() *Instruction {
+ p := self.alloc("SYSCALL", 0, Operands { })
+ // SYSCALL
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x05)
+ })
+ return p
+}
+
+// T1MSKC performs "Inverse Mask From Trailing Ones".
+//
+// Mnemonic : T1MSKC
+// Supported forms : (4 forms)
+//
+// * T1MSKC r32, r32 [TBM]
+// * T1MSKC m32, r32 [TBM]
+// * T1MSKC r64, r64 [TBM]
+// * T1MSKC m64, r64 [TBM]
+//
+func (self *Program) T1MSKC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("T1MSKC", 2, Operands { v0, v1 })
+ // T1MSKC r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xf8 | lcode(v[0]))
+ })
+ }
+ // T1MSKC m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(7, addr(v[0]), 1)
+ })
+ }
+ // T1MSKC r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xf8 | lcode(v[0]))
+ })
+ }
+ // T1MSKC m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(7, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for T1MSKC")
+ }
+ return p
+}
+
+// TESTB performs "Logical Compare".
+//
+// Mnemonic : TEST
+// Supported forms : (5 forms)
+//
+// * TESTB imm8, al
+// * TESTB imm8, r8
+// * TESTB r8, r8
+// * TESTB imm8, m8
+// * TESTB r8, m8
+//
+func (self *Program) TESTB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("TESTB", 2, Operands { v0, v1 })
+ // TESTB imm8, al
+ if isImm8(v0) && v1 == AL {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xa8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // TESTB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // TESTB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x84)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // TESTB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xf6)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // TESTB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x84)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for TESTB")
+ }
+ return p
+}
+
+// TESTL performs "Logical Compare".
+//
+// Mnemonic : TEST
+// Supported forms : (5 forms)
+//
+// * TESTL imm32, eax
+// * TESTL imm32, r32
+// * TESTL r32, r32
+// * TESTL imm32, m32
+// * TESTL r32, m32
+//
+func (self *Program) TESTL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("TESTL", 2, Operands { v0, v1 })
+ // TESTL imm32, eax
+ if isImm32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xa9)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // TESTL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // TESTL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x85)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // TESTL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xf7)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // TESTL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x85)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for TESTL")
+ }
+ return p
+}
+
+// TESTQ performs "Logical Compare".
+//
+// Mnemonic : TEST
+// Supported forms : (5 forms)
+//
+// * TESTQ imm32, rax
+// * TESTQ imm32, r64
+// * TESTQ r64, r64
+// * TESTQ imm32, m64
+// * TESTQ r64, m64
+//
+func (self *Program) TESTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("TESTQ", 2, Operands { v0, v1 })
+ // TESTQ imm32, rax
+ if isImm32(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0xa9)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // TESTQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // TESTQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x85)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // TESTQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0xf7)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // TESTQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x85)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for TESTQ")
+ }
+ return p
+}
+
+// TESTW performs "Logical Compare".
+//
+// Mnemonic : TEST
+// Supported forms : (5 forms)
+//
+// * TESTW imm16, ax
+// * TESTW imm16, r16
+// * TESTW r16, r16
+// * TESTW imm16, m16
+// * TESTW r16, m16
+//
+func (self *Program) TESTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("TESTW", 2, Operands { v0, v1 })
+ // TESTW imm16, ax
+ if isImm16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0xa9)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // TESTW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // TESTW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x85)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // TESTW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0xf7)
+ m.mrsd(0, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // TESTW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x85)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for TESTW")
+ }
+ return p
+}
+
+// TZCNTL performs "Count the Number of Trailing Zero Bits".
+//
+// Mnemonic : TZCNT
+// Supported forms : (2 forms)
+//
+// * TZCNTL r32, r32 [BMI]
+// * TZCNTL m32, r32 [BMI]
+//
+func (self *Program) TZCNTL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("TZCNTL", 2, Operands { v0, v1 })
+ // TZCNTL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // TZCNTL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for TZCNTL")
+ }
+ return p
+}
+
+// TZCNTQ performs "Count the Number of Trailing Zero Bits".
+//
+// Mnemonic : TZCNT
+// Supported forms : (2 forms)
+//
+// * TZCNTQ r64, r64 [BMI]
+// * TZCNTQ m64, r64 [BMI]
+//
+func (self *Program) TZCNTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("TZCNTQ", 2, Operands { v0, v1 })
+ // TZCNTQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // TZCNTQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xf3)
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for TZCNTQ")
+ }
+ return p
+}
+
+// TZCNTW performs "Count the Number of Trailing Zero Bits".
+//
+// Mnemonic : TZCNT
+// Supported forms : (2 forms)
+//
+// * TZCNTW r16, r16 [BMI]
+// * TZCNTW m16, r16 [BMI]
+//
+func (self *Program) TZCNTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("TZCNTW", 2, Operands { v0, v1 })
+ // TZCNTW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // TZCNTW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ self.require(ISA_BMI)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0xf3)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0xbc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for TZCNTW")
+ }
+ return p
+}
+
+// TZMSK performs "Mask From Trailing Zeros".
+//
+// Mnemonic : TZMSK
+// Supported forms : (4 forms)
+//
+// * TZMSK r32, r32 [TBM]
+// * TZMSK m32, r32 [TBM]
+// * TZMSK r64, r64 [TBM]
+// * TZMSK m64, r64 [TBM]
+//
+func (self *Program) TZMSK(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("TZMSK", 2, Operands { v0, v1 })
+ // TZMSK r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0x78 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xe0 | lcode(v[0]))
+ })
+ }
+ // TZMSK m32, r32
+ if isM32(v0) && isReg32(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(4, addr(v[0]), 1)
+ })
+ }
+ // TZMSK r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xe0 | lcode(v[0]))
+ })
+ }
+ // TZMSK m64, r64
+ if isM64(v0) && isReg64(v1) {
+ self.require(ISA_TBM)
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(4, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for TZMSK")
+ }
+ return p
+}
+
+// UCOMISD performs "Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS".
+//
+// Mnemonic : UCOMISD
+// Supported forms : (2 forms)
+//
+// * UCOMISD xmm, xmm [SSE2]
+// * UCOMISD m64, xmm [SSE2]
+//
+func (self *Program) UCOMISD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("UCOMISD", 2, Operands { v0, v1 })
+ // UCOMISD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // UCOMISD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for UCOMISD")
+ }
+ return p
+}
+
+// UCOMISS performs "Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS".
+//
+// Mnemonic : UCOMISS
+// Supported forms : (2 forms)
+//
+// * UCOMISS xmm, xmm [SSE]
+// * UCOMISS m32, xmm [SSE]
+//
+func (self *Program) UCOMISS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("UCOMISS", 2, Operands { v0, v1 })
+ // UCOMISS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x2e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // UCOMISS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x2e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for UCOMISS")
+ }
+ return p
+}
+
+// UD2 performs "Undefined Instruction".
+//
+// Mnemonic : UD2
+// Supported forms : (1 form)
+//
+// * UD2
+//
+func (self *Program) UD2() *Instruction {
+ p := self.alloc("UD2", 0, Operands { })
+ // UD2
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x0b)
+ })
+ return p
+}
+
+// UNPCKHPD performs "Unpack and Interleave High Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : UNPCKHPD
+// Supported forms : (2 forms)
+//
+// * UNPCKHPD xmm, xmm [SSE2]
+// * UNPCKHPD m128, xmm [SSE2]
+//
+func (self *Program) UNPCKHPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("UNPCKHPD", 2, Operands { v0, v1 })
+ // UNPCKHPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // UNPCKHPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x15)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for UNPCKHPD")
+ }
+ return p
+}
+
+// UNPCKHPS performs "Unpack and Interleave High Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : UNPCKHPS
+// Supported forms : (2 forms)
+//
+// * UNPCKHPS xmm, xmm [SSE]
+// * UNPCKHPS m128, xmm [SSE]
+//
+func (self *Program) UNPCKHPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("UNPCKHPS", 2, Operands { v0, v1 })
+ // UNPCKHPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // UNPCKHPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x15)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for UNPCKHPS")
+ }
+ return p
+}
+
+// UNPCKLPD performs "Unpack and Interleave Low Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : UNPCKLPD
+// Supported forms : (2 forms)
+//
+// * UNPCKLPD xmm, xmm [SSE2]
+// * UNPCKLPD m128, xmm [SSE2]
+//
+func (self *Program) UNPCKLPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("UNPCKLPD", 2, Operands { v0, v1 })
+ // UNPCKLPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // UNPCKLPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x14)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for UNPCKLPD")
+ }
+ return p
+}
+
+// UNPCKLPS performs "Unpack and Interleave Low Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : UNPCKLPS
+// Supported forms : (2 forms)
+//
+// * UNPCKLPS xmm, xmm [SSE]
+// * UNPCKLPS m128, xmm [SSE]
+//
+func (self *Program) UNPCKLPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("UNPCKLPS", 2, Operands { v0, v1 })
+ // UNPCKLPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // UNPCKLPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x14)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for UNPCKLPS")
+ }
+ return p
+}
+
+// VADDPD performs "Add Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VADDPD
+// Supported forms : (11 forms)
+//
+// * VADDPD xmm, xmm, xmm [AVX]
+// * VADDPD m128, xmm, xmm [AVX]
+// * VADDPD ymm, ymm, ymm [AVX]
+// * VADDPD m256, ymm, ymm [AVX]
+// * VADDPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VADDPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VADDPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VADDPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VADDPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VADDPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VADDPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VADDPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VADDPD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VADDPD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VADDPD takes 3 or 4 operands")
+ }
+ // VADDPD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDPD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VADDPD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDPD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VADDPD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VADDPD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VADDPD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDPD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VADDPD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDPD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VADDPD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VADDPD")
+ }
+ return p
+}
+
+// VADDPS performs "Add Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VADDPS
+// Supported forms : (11 forms)
+//
+// * VADDPS xmm, xmm, xmm [AVX]
+// * VADDPS m128, xmm, xmm [AVX]
+// * VADDPS ymm, ymm, ymm [AVX]
+// * VADDPS m256, ymm, ymm [AVX]
+// * VADDPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VADDPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VADDPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VADDPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VADDPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VADDPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VADDPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VADDPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VADDPS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VADDPS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VADDPS takes 3 or 4 operands")
+ }
+ // VADDPS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDPS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VADDPS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDPS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VADDPS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VADDPS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VADDPS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDPS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VADDPS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDPS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VADDPS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VADDPS")
+ }
+ return p
+}
+
+// VADDSD performs "Add Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VADDSD
+// Supported forms : (5 forms)
+//
+// * VADDSD xmm, xmm, xmm [AVX]
+// * VADDSD m64, xmm, xmm [AVX]
+// * VADDSD m64, xmm, xmm{k}{z} [AVX512F]
+// * VADDSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VADDSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VADDSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VADDSD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VADDSD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VADDSD takes 3 or 4 operands")
+ }
+ // VADDSD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDSD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VADDSD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VADDSD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VADDSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VADDSD")
+ }
+ return p
+}
+
+// VADDSS performs "Add Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VADDSS
+// Supported forms : (5 forms)
+//
+// * VADDSS xmm, xmm, xmm [AVX]
+// * VADDSS m32, xmm, xmm [AVX]
+// * VADDSS m32, xmm, xmm{k}{z} [AVX512F]
+// * VADDSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VADDSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VADDSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VADDSS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VADDSS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VADDSS takes 3 or 4 operands")
+ }
+ // VADDSS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDSS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VADDSS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x58)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VADDSS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VADDSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VADDSS")
+ }
+ return p
+}
+
+// VADDSUBPD performs "Packed Double-FP Add/Subtract".
+//
+// Mnemonic : VADDSUBPD
+// Supported forms : (4 forms)
+//
+// * VADDSUBPD xmm, xmm, xmm [AVX]
+// * VADDSUBPD m128, xmm, xmm [AVX]
+// * VADDSUBPD ymm, ymm, ymm [AVX]
+// * VADDSUBPD m256, ymm, ymm [AVX]
+//
+func (self *Program) VADDSUBPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VADDSUBPD", 3, Operands { v0, v1, v2 })
+ // VADDSUBPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDSUBPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd0)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VADDSUBPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDSUBPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd0)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VADDSUBPD")
+ }
+ return p
+}
+
+// VADDSUBPS performs "Packed Single-FP Add/Subtract".
+//
+// Mnemonic : VADDSUBPS
+// Supported forms : (4 forms)
+//
+// * VADDSUBPS xmm, xmm, xmm [AVX]
+// * VADDSUBPS m128, xmm, xmm [AVX]
+// * VADDSUBPS ymm, ymm, ymm [AVX]
+// * VADDSUBPS m256, ymm, ymm [AVX]
+//
+func (self *Program) VADDSUBPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VADDSUBPS", 3, Operands { v0, v1, v2 })
+ // VADDSUBPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDSUBPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd0)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VADDSUBPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VADDSUBPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd0)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VADDSUBPS")
+ }
+ return p
+}
+
+// VAESDEC performs "Perform One Round of an AES Decryption Flow".
+//
+// Mnemonic : VAESDEC
+// Supported forms : (2 forms)
+//
+// * VAESDEC xmm, xmm, xmm [AES,AVX]
+// * VAESDEC m128, xmm, xmm [AES,AVX]
+//
+func (self *Program) VAESDEC(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VAESDEC", 3, Operands { v0, v1, v2 })
+ // VAESDEC xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xde)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VAESDEC m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xde)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VAESDEC")
+ }
+ return p
+}
+
+// VAESDECLAST performs "Perform Last Round of an AES Decryption Flow".
+//
+// Mnemonic : VAESDECLAST
+// Supported forms : (2 forms)
+//
+// * VAESDECLAST xmm, xmm, xmm [AES,AVX]
+// * VAESDECLAST m128, xmm, xmm [AES,AVX]
+//
+func (self *Program) VAESDECLAST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VAESDECLAST", 3, Operands { v0, v1, v2 })
+ // VAESDECLAST xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VAESDECLAST m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VAESDECLAST")
+ }
+ return p
+}
+
+// VAESENC performs "Perform One Round of an AES Encryption Flow".
+//
+// Mnemonic : VAESENC
+// Supported forms : (2 forms)
+//
+// * VAESENC xmm, xmm, xmm [AES,AVX]
+// * VAESENC m128, xmm, xmm [AES,AVX]
+//
+func (self *Program) VAESENC(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VAESENC", 3, Operands { v0, v1, v2 })
+ // VAESENC xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xdc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VAESENC m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VAESENC")
+ }
+ return p
+}
+
+// VAESENCLAST performs "Perform Last Round of an AES Encryption Flow".
+//
+// Mnemonic : VAESENCLAST
+// Supported forms : (2 forms)
+//
+// * VAESENCLAST xmm, xmm, xmm [AES,AVX]
+// * VAESENCLAST m128, xmm, xmm [AES,AVX]
+//
+func (self *Program) VAESENCLAST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VAESENCLAST", 3, Operands { v0, v1, v2 })
+ // VAESENCLAST xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xdd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VAESENCLAST m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VAESENCLAST")
+ }
+ return p
+}
+
+// VAESIMC performs "Perform the AES InvMixColumn Transformation".
+//
+// Mnemonic : VAESIMC
+// Supported forms : (2 forms)
+//
+// * VAESIMC xmm, xmm [AES,AVX]
+// * VAESIMC m128, xmm [AES,AVX]
+//
+func (self *Program) VAESIMC(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VAESIMC", 2, Operands { v0, v1 })
+ // VAESIMC xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VAESIMC m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xdb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VAESIMC")
+ }
+ return p
+}
+
+// VAESKEYGENASSIST performs "AES Round Key Generation Assist".
+//
+// Mnemonic : VAESKEYGENASSIST
+// Supported forms : (2 forms)
+//
+// * VAESKEYGENASSIST imm8, xmm, xmm [AES,AVX]
+// * VAESKEYGENASSIST imm8, m128, xmm [AES,AVX]
+//
+func (self *Program) VAESKEYGENASSIST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VAESKEYGENASSIST", 3, Operands { v0, v1, v2 })
+ // VAESKEYGENASSIST imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VAESKEYGENASSIST imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX | ISA_AES)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VAESKEYGENASSIST")
+ }
+ return p
+}
+
+// VALIGND performs "Align Doubleword Vectors".
+//
+// Mnemonic : VALIGND
+// Supported forms : (6 forms)
+//
+// * VALIGND imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VALIGND imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VALIGND imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VALIGND imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VALIGND imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VALIGND imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VALIGND(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VALIGND", 4, Operands { v0, v1, v2, v3 })
+ // VALIGND imm8, m512/m32bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x03)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGND imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGND imm8, m128/m32bcst, xmm, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x03)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGND imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGND imm8, m256/m32bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x03)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGND imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VALIGND")
+ }
+ return p
+}
+
+// VALIGNQ performs "Align Quadword Vectors".
+//
+// Mnemonic : VALIGNQ
+// Supported forms : (6 forms)
+//
+// * VALIGNQ imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VALIGNQ imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VALIGNQ imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VALIGNQ imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VALIGNQ imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VALIGNQ imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VALIGNQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VALIGNQ", 4, Operands { v0, v1, v2, v3 })
+ // VALIGNQ imm8, m512/m64bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x03)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGNQ imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGNQ imm8, m128/m64bcst, xmm, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x03)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGNQ imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGNQ imm8, m256/m64bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x03)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VALIGNQ imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VALIGNQ")
+ }
+ return p
+}
+
+// VANDNPD performs "Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VANDNPD
+// Supported forms : (10 forms)
+//
+// * VANDNPD xmm, xmm, xmm [AVX]
+// * VANDNPD m128, xmm, xmm [AVX]
+// * VANDNPD ymm, ymm, ymm [AVX]
+// * VANDNPD m256, ymm, ymm [AVX]
+// * VANDNPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VANDNPD zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VANDNPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDNPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDNPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDNPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VANDNPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VANDNPD", 3, Operands { v0, v1, v2 })
+ // VANDNPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDNPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VANDNPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDNPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VANDNPD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VANDNPD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDNPD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VANDNPD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDNPD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VANDNPD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VANDNPD")
+ }
+ return p
+}
+
+// VANDNPS performs "Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VANDNPS
+// Supported forms : (10 forms)
+//
+// * VANDNPS xmm, xmm, xmm [AVX]
+// * VANDNPS m128, xmm, xmm [AVX]
+// * VANDNPS ymm, ymm, ymm [AVX]
+// * VANDNPS m256, ymm, ymm [AVX]
+// * VANDNPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VANDNPS zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VANDNPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDNPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDNPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDNPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VANDNPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VANDNPS", 3, Operands { v0, v1, v2 })
+ // VANDNPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDNPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VANDNPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDNPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VANDNPS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VANDNPS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDNPS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VANDNPS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDNPS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VANDNPS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VANDNPS")
+ }
+ return p
+}
+
+// VANDPD performs "Bitwise Logical AND of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VANDPD
+// Supported forms : (10 forms)
+//
+// * VANDPD xmm, xmm, xmm [AVX]
+// * VANDPD m128, xmm, xmm [AVX]
+// * VANDPD ymm, ymm, ymm [AVX]
+// * VANDPD m256, ymm, ymm [AVX]
+// * VANDPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VANDPD zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VANDPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VANDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VANDPD", 3, Operands { v0, v1, v2 })
+ // VANDPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VANDPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VANDPD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VANDPD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDPD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VANDPD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDPD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VANDPD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VANDPD")
+ }
+ return p
+}
+
+// VANDPS performs "Bitwise Logical AND of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VANDPS
+// Supported forms : (10 forms)
+//
+// * VANDPS xmm, xmm, xmm [AVX]
+// * VANDPS m128, xmm, xmm [AVX]
+// * VANDPS ymm, ymm, ymm [AVX]
+// * VANDPS m256, ymm, ymm [AVX]
+// * VANDPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VANDPS zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VANDPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VANDPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VANDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VANDPS", 3, Operands { v0, v1, v2 })
+ // VANDPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VANDPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VANDPS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VANDPS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDPS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VANDPS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VANDPS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VANDPS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VANDPS")
+ }
+ return p
+}
+
+// VBLENDMPD performs "Blend Packed Double-Precision Floating-Point Vectors Using an OpMask Control".
+//
+// Mnemonic : VBLENDMPD
+// Supported forms : (6 forms)
+//
+// * VBLENDMPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VBLENDMPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VBLENDMPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VBLENDMPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VBLENDMPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VBLENDMPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VBLENDMPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VBLENDMPD", 3, Operands { v0, v1, v2 })
+ // VBLENDMPD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VBLENDMPD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBLENDMPD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VBLENDMPD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBLENDMPD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VBLENDMPD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBLENDMPD")
+ }
+ return p
+}
+
+// VBLENDMPS performs "Blend Packed Single-Precision Floating-Point Vectors Using an OpMask Control".
+//
+// Mnemonic : VBLENDMPS
+// Supported forms : (6 forms)
+//
+// * VBLENDMPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VBLENDMPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VBLENDMPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VBLENDMPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VBLENDMPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VBLENDMPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VBLENDMPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VBLENDMPS", 3, Operands { v0, v1, v2 })
+ // VBLENDMPS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VBLENDMPS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBLENDMPS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VBLENDMPS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBLENDMPS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VBLENDMPS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBLENDMPS")
+ }
+ return p
+}
+
+// VBLENDPD performs "Blend Packed Double Precision Floating-Point Values".
+//
+// Mnemonic : VBLENDPD
+// Supported forms : (4 forms)
+//
+// * VBLENDPD imm8, xmm, xmm, xmm [AVX]
+// * VBLENDPD imm8, m128, xmm, xmm [AVX]
+// * VBLENDPD imm8, ymm, ymm, ymm [AVX]
+// * VBLENDPD imm8, m256, ymm, ymm [AVX]
+//
+func (self *Program) VBLENDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VBLENDPD", 4, Operands { v0, v1, v2, v3 })
+ // VBLENDPD imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x0d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VBLENDPD imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0d)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VBLENDPD imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x0d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VBLENDPD imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0d)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBLENDPD")
+ }
+ return p
+}
+
+// VBLENDPS performs " Blend Packed Single Precision Floating-Point Values".
+//
+// Mnemonic : VBLENDPS
+// Supported forms : (4 forms)
+//
+// * VBLENDPS imm8, xmm, xmm, xmm [AVX]
+// * VBLENDPS imm8, m128, xmm, xmm [AVX]
+// * VBLENDPS imm8, ymm, ymm, ymm [AVX]
+// * VBLENDPS imm8, m256, ymm, ymm [AVX]
+//
+func (self *Program) VBLENDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VBLENDPS", 4, Operands { v0, v1, v2, v3 })
+ // VBLENDPS imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x0c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VBLENDPS imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VBLENDPS imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x0c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VBLENDPS imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBLENDPS")
+ }
+ return p
+}
+
+// VBLENDVPD performs " Variable Blend Packed Double Precision Floating-Point Values".
+//
+// Mnemonic : VBLENDVPD
+// Supported forms : (4 forms)
+//
+// * VBLENDVPD xmm, xmm, xmm, xmm [AVX]
+// * VBLENDVPD xmm, m128, xmm, xmm [AVX]
+// * VBLENDVPD ymm, ymm, ymm, ymm [AVX]
+// * VBLENDVPD ymm, m256, ymm, ymm [AVX]
+//
+func (self *Program) VBLENDVPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VBLENDVPD", 4, Operands { v0, v1, v2, v3 })
+ // VBLENDVPD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VBLENDVPD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x4b)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VBLENDVPD ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x4b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VBLENDVPD ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x4b)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBLENDVPD")
+ }
+ return p
+}
+
+// VBLENDVPS performs " Variable Blend Packed Single Precision Floating-Point Values".
+//
+// Mnemonic : VBLENDVPS
+// Supported forms : (4 forms)
+//
+// * VBLENDVPS xmm, xmm, xmm, xmm [AVX]
+// * VBLENDVPS xmm, m128, xmm, xmm [AVX]
+// * VBLENDVPS ymm, ymm, ymm, ymm [AVX]
+// * VBLENDVPS ymm, m256, ymm, ymm [AVX]
+//
+func (self *Program) VBLENDVPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VBLENDVPS", 4, Operands { v0, v1, v2, v3 })
+ // VBLENDVPS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VBLENDVPS xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x4a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VBLENDVPS ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x4a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VBLENDVPS ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x4a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBLENDVPS")
+ }
+ return p
+}
+
+// VBROADCASTF128 performs "Broadcast 128 Bit of Floating-Point Data".
+//
+// Mnemonic : VBROADCASTF128
+// Supported forms : (1 form)
+//
+// * VBROADCASTF128 m128, ymm [AVX]
+//
+func (self *Program) VBROADCASTF128(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTF128", 2, Operands { v0, v1 })
+ // VBROADCASTF128 m128, ymm
+ if isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x1a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTF128")
+ }
+ return p
+}
+
+// VBROADCASTF32X2 performs "Broadcast Two Single-Precision Floating-Point Elements".
+//
+// Mnemonic : VBROADCASTF32X2
+// Supported forms : (4 forms)
+//
+// * VBROADCASTF32X2 xmm, zmm{k}{z} [AVX512DQ]
+// * VBROADCASTF32X2 m64, zmm{k}{z} [AVX512DQ]
+// * VBROADCASTF32X2 xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VBROADCASTF32X2 m64, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VBROADCASTF32X2(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTF32X2", 2, Operands { v0, v1 })
+ // VBROADCASTF32X2 xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTF32X2 m64, zmm{k}{z}
+ if isM64(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VBROADCASTF32X2 xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTF32X2 m64, ymm{k}{z}
+ if isM64(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTF32X2")
+ }
+ return p
+}
+
+// VBROADCASTF32X4 performs "Broadcast Four Single-Precision Floating-Point Elements".
+//
+// Mnemonic : VBROADCASTF32X4
+// Supported forms : (2 forms)
+//
+// * VBROADCASTF32X4 m128, zmm{k}{z} [AVX512F]
+// * VBROADCASTF32X4 m128, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VBROADCASTF32X4(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTF32X4", 2, Operands { v0, v1 })
+ // VBROADCASTF32X4 m128, zmm{k}{z}
+ if isM128(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VBROADCASTF32X4 m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTF32X4")
+ }
+ return p
+}
+
+// VBROADCASTF32X8 performs "Broadcast Eight Single-Precision Floating-Point Elements".
+//
+// Mnemonic : VBROADCASTF32X8
+// Supported forms : (1 form)
+//
+// * VBROADCASTF32X8 m256, zmm{k}{z} [AVX512DQ]
+//
+func (self *Program) VBROADCASTF32X8(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTF32X8", 2, Operands { v0, v1 })
+ // VBROADCASTF32X8 m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTF32X8")
+ }
+ return p
+}
+
+// VBROADCASTF64X2 performs "Broadcast Two Double-Precision Floating-Point Elements".
+//
+// Mnemonic : VBROADCASTF64X2
+// Supported forms : (2 forms)
+//
+// * VBROADCASTF64X2 m128, zmm{k}{z} [AVX512DQ]
+// * VBROADCASTF64X2 m128, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VBROADCASTF64X2(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTF64X2", 2, Operands { v0, v1 })
+ // VBROADCASTF64X2 m128, zmm{k}{z}
+ if isM128(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VBROADCASTF64X2 m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTF64X2")
+ }
+ return p
+}
+
+// VBROADCASTF64X4 performs "Broadcast Four Double-Precision Floating-Point Elements".
+//
+// Mnemonic : VBROADCASTF64X4
+// Supported forms : (1 form)
+//
+// * VBROADCASTF64X4 m256, zmm{k}{z} [AVX512F]
+//
+func (self *Program) VBROADCASTF64X4(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTF64X4", 2, Operands { v0, v1 })
+ // VBROADCASTF64X4 m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTF64X4")
+ }
+ return p
+}
+
+// VBROADCASTI128 performs "Broadcast 128 Bits of Integer Data".
+//
+// Mnemonic : VBROADCASTI128
+// Supported forms : (1 form)
+//
+// * VBROADCASTI128 m128, ymm [AVX2]
+//
+func (self *Program) VBROADCASTI128(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTI128", 2, Operands { v0, v1 })
+ // VBROADCASTI128 m128, ymm
+ if isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTI128")
+ }
+ return p
+}
+
+// VBROADCASTI32X2 performs "Broadcast Two Doubleword Elements".
+//
+// Mnemonic : VBROADCASTI32X2
+// Supported forms : (6 forms)
+//
+// * VBROADCASTI32X2 xmm, zmm{k}{z} [AVX512DQ]
+// * VBROADCASTI32X2 m64, zmm{k}{z} [AVX512DQ]
+// * VBROADCASTI32X2 xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VBROADCASTI32X2 xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VBROADCASTI32X2 m64, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VBROADCASTI32X2 m64, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VBROADCASTI32X2(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTI32X2", 2, Operands { v0, v1 })
+ // VBROADCASTI32X2 xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTI32X2 m64, zmm{k}{z}
+ if isM64(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VBROADCASTI32X2 xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTI32X2 xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTI32X2 m64, xmm{k}{z}
+ if isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VBROADCASTI32X2 m64, ymm{k}{z}
+ if isM64(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTI32X2")
+ }
+ return p
+}
+
+// VBROADCASTI32X4 performs "Broadcast Four Doubleword Elements".
+//
+// Mnemonic : VBROADCASTI32X4
+// Supported forms : (2 forms)
+//
+// * VBROADCASTI32X4 m128, zmm{k}{z} [AVX512F]
+// * VBROADCASTI32X4 m128, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VBROADCASTI32X4(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTI32X4", 2, Operands { v0, v1 })
+ // VBROADCASTI32X4 m128, zmm{k}{z}
+ if isM128(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VBROADCASTI32X4 m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTI32X4")
+ }
+ return p
+}
+
+// VBROADCASTI32X8 performs "Broadcast Eight Doubleword Elements".
+//
+// Mnemonic : VBROADCASTI32X8
+// Supported forms : (1 form)
+//
+// * VBROADCASTI32X8 m256, zmm{k}{z} [AVX512DQ]
+//
+func (self *Program) VBROADCASTI32X8(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTI32X8", 2, Operands { v0, v1 })
+ // VBROADCASTI32X8 m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTI32X8")
+ }
+ return p
+}
+
+// VBROADCASTI64X2 performs "Broadcast Two Quadword Elements".
+//
+// Mnemonic : VBROADCASTI64X2
+// Supported forms : (2 forms)
+//
+// * VBROADCASTI64X2 m128, zmm{k}{z} [AVX512DQ]
+// * VBROADCASTI64X2 m128, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VBROADCASTI64X2(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTI64X2", 2, Operands { v0, v1 })
+ // VBROADCASTI64X2 m128, zmm{k}{z}
+ if isM128(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VBROADCASTI64X2 m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTI64X2")
+ }
+ return p
+}
+
+// VBROADCASTI64X4 performs "Broadcast Four Quadword Elements".
+//
+// Mnemonic : VBROADCASTI64X4
+// Supported forms : (1 form)
+//
+// * VBROADCASTI64X4 m256, zmm{k}{z} [AVX512F]
+//
+func (self *Program) VBROADCASTI64X4(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTI64X4", 2, Operands { v0, v1 })
+ // VBROADCASTI64X4 m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTI64X4")
+ }
+ return p
+}
+
+// VBROADCASTSD performs "Broadcast Double-Precision Floating-Point Element".
+//
+// Mnemonic : VBROADCASTSD
+// Supported forms : (6 forms)
+//
+// * VBROADCASTSD m64, ymm [AVX]
+// * VBROADCASTSD xmm, ymm [AVX2]
+// * VBROADCASTSD xmm, zmm{k}{z} [AVX512F]
+// * VBROADCASTSD m64, zmm{k}{z} [AVX512F]
+// * VBROADCASTSD xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VBROADCASTSD m64, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VBROADCASTSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTSD", 2, Operands { v0, v1 })
+ // VBROADCASTSD m64, ymm
+ if isM64(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VBROADCASTSD xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTSD xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTSD m64, zmm{k}{z}
+ if isM64(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VBROADCASTSD xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTSD m64, ymm{k}{z}
+ if isM64(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTSD")
+ }
+ return p
+}
+
+// VBROADCASTSS performs "Broadcast Single-Precision Floating-Point Element".
+//
+// Mnemonic : VBROADCASTSS
+// Supported forms : (8 forms)
+//
+// * VBROADCASTSS m32, xmm [AVX]
+// * VBROADCASTSS m32, ymm [AVX]
+// * VBROADCASTSS xmm, xmm [AVX2]
+// * VBROADCASTSS xmm, ymm [AVX2]
+// * VBROADCASTSS xmm, zmm{k}{z} [AVX512F]
+// * VBROADCASTSS m32, zmm{k}{z} [AVX512F]
+// * VBROADCASTSS xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VBROADCASTSS m32, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VBROADCASTSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VBROADCASTSS", 2, Operands { v0, v1 })
+ // VBROADCASTSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x18)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VBROADCASTSS m32, ymm
+ if isM32(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x18)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VBROADCASTSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTSS xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTSS xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTSS m32, zmm{k}{z}
+ if isM32(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x18)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VBROADCASTSS xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VBROADCASTSS m32, ymm{k}{z}
+ if isM32(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x18)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VBROADCASTSS")
+ }
+ return p
+}
+
+// VCMPPD performs "Compare Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VCMPPD
+// Supported forms : (11 forms)
+//
+// * VCMPPD imm8, xmm, xmm, xmm [AVX]
+// * VCMPPD imm8, m128, xmm, xmm [AVX]
+// * VCMPPD imm8, ymm, ymm, ymm [AVX]
+// * VCMPPD imm8, m256, ymm, ymm [AVX]
+// * VCMPPD imm8, m512/m64bcst, zmm, k{k} [AVX512F]
+// * VCMPPD imm8, {sae}, zmm, zmm, k{k} [AVX512F]
+// * VCMPPD imm8, zmm, zmm, k{k} [AVX512F]
+// * VCMPPD imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VCMPPD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VCMPPD imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VCMPPD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VCMPPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCMPPD", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VCMPPD", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VCMPPD takes 4 or 5 operands")
+ }
+ // VCMPPD imm8, xmm, xmm, xmm
+ if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, m128, xmm, xmm
+ if len(vv) == 0 && isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, ymm, ymm, ymm
+ if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, m256, ymm, ymm
+ if len(vv) == 0 && isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, m512/m64bcst, zmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, {sae}, zmm, zmm, k{k}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isKk(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[3]) << 3))
+ m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, zmm, zmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, m128/m64bcst, xmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, xmm, xmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, m256/m64bcst, ymm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPD imm8, ymm, ymm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCMPPD")
+ }
+ return p
+}
+
+// VCMPPS performs "Compare Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VCMPPS
+// Supported forms : (11 forms)
+//
+// * VCMPPS imm8, xmm, xmm, xmm [AVX]
+// * VCMPPS imm8, m128, xmm, xmm [AVX]
+// * VCMPPS imm8, ymm, ymm, ymm [AVX]
+// * VCMPPS imm8, m256, ymm, ymm [AVX]
+// * VCMPPS imm8, m512/m32bcst, zmm, k{k} [AVX512F]
+// * VCMPPS imm8, {sae}, zmm, zmm, k{k} [AVX512F]
+// * VCMPPS imm8, zmm, zmm, k{k} [AVX512F]
+// * VCMPPS imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VCMPPS imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VCMPPS imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VCMPPS imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VCMPPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCMPPS", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VCMPPS", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VCMPPS takes 4 or 5 operands")
+ }
+ // VCMPPS imm8, xmm, xmm, xmm
+ if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, m128, xmm, xmm
+ if len(vv) == 0 && isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, ymm, ymm, ymm
+ if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, m256, ymm, ymm
+ if len(vv) == 0 && isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, m512/m32bcst, zmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, {sae}, zmm, zmm, k{k}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isKk(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[3]) << 3))
+ m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, zmm, zmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, m128/m32bcst, xmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, xmm, xmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, m256/m32bcst, ymm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPPS imm8, ymm, ymm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCMPPS")
+ }
+ return p
+}
+
+// VCMPSD performs "Compare Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VCMPSD
+// Supported forms : (5 forms)
+//
+// * VCMPSD imm8, xmm, xmm, xmm [AVX]
+// * VCMPSD imm8, m64, xmm, xmm [AVX]
+// * VCMPSD imm8, m64, xmm, k{k} [AVX512F]
+// * VCMPSD imm8, {sae}, xmm, xmm, k{k} [AVX512F]
+// * VCMPSD imm8, xmm, xmm, k{k} [AVX512F]
+//
+func (self *Program) VCMPSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCMPSD", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VCMPSD", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VCMPSD takes 4 or 5 operands")
+ }
+ // VCMPSD imm8, xmm, xmm, xmm
+ if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPSD imm8, m64, xmm, xmm
+ if len(vv) == 0 && isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPSD imm8, m64, xmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPSD imm8, {sae}, xmm, xmm, k{k}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isKk(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0xff ^ (hlcode(v[3]) << 3))
+ m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPSD imm8, xmm, xmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCMPSD")
+ }
+ return p
+}
+
+// VCMPSS performs "Compare Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VCMPSS
+// Supported forms : (5 forms)
+//
+// * VCMPSS imm8, xmm, xmm, xmm [AVX]
+// * VCMPSS imm8, m32, xmm, xmm [AVX]
+// * VCMPSS imm8, m32, xmm, k{k} [AVX512F]
+// * VCMPSS imm8, {sae}, xmm, xmm, k{k} [AVX512F]
+// * VCMPSS imm8, xmm, xmm, k{k} [AVX512F]
+//
+func (self *Program) VCMPSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCMPSS", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VCMPSS", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VCMPSS takes 4 or 5 operands")
+ }
+ // VCMPSS imm8, xmm, xmm, xmm
+ if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPSS imm8, m32, xmm, xmm
+ if len(vv) == 0 && isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPSS imm8, m32, xmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0xc2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPSS imm8, {sae}, xmm, xmm, k{k}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isKk(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[3]) << 3))
+ m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCMPSS imm8, xmm, xmm, k{k}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCMPSS")
+ }
+ return p
+}
+
+// VCOMISD performs "Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS".
+//
+// Mnemonic : VCOMISD
+// Supported forms : (5 forms)
+//
+// * VCOMISD xmm, xmm [AVX]
+// * VCOMISD m64, xmm [AVX]
+// * VCOMISD m64, xmm [AVX512F]
+// * VCOMISD {sae}, xmm, xmm [AVX512F]
+// * VCOMISD xmm, xmm [AVX512F]
+//
+func (self *Program) VCOMISD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCOMISD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCOMISD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCOMISD takes 2 or 3 operands")
+ }
+ // VCOMISD xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x2f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCOMISD m64, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCOMISD m64, xmm
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCOMISD {sae}, xmm, xmm
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit(0x18)
+ m.emit(0x2f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCOMISD xmm, xmm
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit(0x48)
+ m.emit(0x2f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCOMISD")
+ }
+ return p
+}
+
+// VCOMISS performs "Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS".
+//
+// Mnemonic : VCOMISS
+// Supported forms : (5 forms)
+//
+// * VCOMISS xmm, xmm [AVX]
+// * VCOMISS m32, xmm [AVX]
+// * VCOMISS m32, xmm [AVX512F]
+// * VCOMISS {sae}, xmm, xmm [AVX512F]
+// * VCOMISS xmm, xmm [AVX512F]
+//
+func (self *Program) VCOMISS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCOMISS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCOMISS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCOMISS takes 2 or 3 operands")
+ }
+ // VCOMISS xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x2f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCOMISS m32, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCOMISS m32, xmm
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VCOMISS {sae}, xmm, xmm
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c)
+ m.emit(0x18)
+ m.emit(0x2f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCOMISS xmm, xmm
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit(0x48)
+ m.emit(0x2f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCOMISS")
+ }
+ return p
+}
+
+// VCOMPRESSPD performs "Store Sparse Packed Double-Precision Floating-Point Values into Dense Memory/Register".
+//
+// Mnemonic : VCOMPRESSPD
+// Supported forms : (6 forms)
+//
+// * VCOMPRESSPD zmm, zmm{k}{z} [AVX512F]
+// * VCOMPRESSPD zmm, m512{k}{z} [AVX512F]
+// * VCOMPRESSPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCOMPRESSPD xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VCOMPRESSPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCOMPRESSPD ymm, m256{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCOMPRESSPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VCOMPRESSPD", 2, Operands { v0, v1 })
+ // VCOMPRESSPD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x8a)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCOMPRESSPD zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8a)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VCOMPRESSPD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x8a)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCOMPRESSPD xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8a)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VCOMPRESSPD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x8a)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCOMPRESSPD ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8a)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCOMPRESSPD")
+ }
+ return p
+}
+
+// VCOMPRESSPS performs "Store Sparse Packed Single-Precision Floating-Point Values into Dense Memory/Register".
+//
+// Mnemonic : VCOMPRESSPS
+// Supported forms : (6 forms)
+//
+// * VCOMPRESSPS zmm, zmm{k}{z} [AVX512F]
+// * VCOMPRESSPS zmm, m512{k}{z} [AVX512F]
+// * VCOMPRESSPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCOMPRESSPS xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VCOMPRESSPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCOMPRESSPS ymm, m256{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCOMPRESSPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VCOMPRESSPS", 2, Operands { v0, v1 })
+ // VCOMPRESSPS zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x8a)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCOMPRESSPS zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8a)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VCOMPRESSPS xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x8a)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCOMPRESSPS xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8a)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VCOMPRESSPS ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x8a)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCOMPRESSPS ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8a)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCOMPRESSPS")
+ }
+ return p
+}
+
+// VCVTDQ2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values".
+//
+// Mnemonic : VCVTDQ2PD
+// Supported forms : (10 forms)
+//
+// * VCVTDQ2PD xmm, xmm [AVX]
+// * VCVTDQ2PD m64, xmm [AVX]
+// * VCVTDQ2PD xmm, ymm [AVX]
+// * VCVTDQ2PD m128, ymm [AVX]
+// * VCVTDQ2PD m256/m32bcst, zmm{k}{z} [AVX512F]
+// * VCVTDQ2PD ymm, zmm{k}{z} [AVX512F]
+// * VCVTDQ2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTDQ2PD m128/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCVTDQ2PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTDQ2PD xmm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTDQ2PD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VCVTDQ2PD", 2, Operands { v0, v1 })
+ // VCVTDQ2PD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), v[0], 0)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTDQ2PD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTDQ2PD xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), v[0], 0)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTDQ2PD m128, ymm
+ if isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTDQ2PD m256/m32bcst, zmm{k}{z}
+ if isM256M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTDQ2PD ymm, zmm{k}{z}
+ if isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTDQ2PD m64/m32bcst, xmm{k}{z}
+ if isM64M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTDQ2PD m128/m32bcst, ymm{k}{z}
+ if isM128M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTDQ2PD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTDQ2PD xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTDQ2PD")
+ }
+ return p
+}
+
+// VCVTDQ2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values".
+//
+// Mnemonic : VCVTDQ2PS
+// Supported forms : (11 forms)
+//
+// * VCVTDQ2PS xmm, xmm [AVX]
+// * VCVTDQ2PS m128, xmm [AVX]
+// * VCVTDQ2PS ymm, ymm [AVX]
+// * VCVTDQ2PS m256, ymm [AVX]
+// * VCVTDQ2PS m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VCVTDQ2PS {er}, zmm, zmm{k}{z} [AVX512F]
+// * VCVTDQ2PS zmm, zmm{k}{z} [AVX512F]
+// * VCVTDQ2PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTDQ2PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCVTDQ2PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTDQ2PS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTDQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTDQ2PS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTDQ2PS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTDQ2PS takes 2 or 3 operands")
+ }
+ // VCVTDQ2PS xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTDQ2PS m128, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTDQ2PS ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), v[0], 0)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTDQ2PS m256, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTDQ2PS m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTDQ2PS {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTDQ2PS zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTDQ2PS m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTDQ2PS m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTDQ2PS xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTDQ2PS ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTDQ2PS")
+ }
+ return p
+}
+
+// VCVTPD2DQ performs "Convert Packed Double-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : VCVTPD2DQ
+// Supported forms : (11 forms)
+//
+// * VCVTPD2DQ xmm, xmm [AVX]
+// * VCVTPD2DQ ymm, xmm [AVX]
+// * VCVTPD2DQ m128, xmm [AVX]
+// * VCVTPD2DQ m256, xmm [AVX]
+// * VCVTPD2DQ m512/m64bcst, ymm{k}{z} [AVX512F]
+// * VCVTPD2DQ {er}, zmm, ymm{k}{z} [AVX512F]
+// * VCVTPD2DQ zmm, ymm{k}{z} [AVX512F]
+// * VCVTPD2DQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPD2DQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPD2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPD2DQ ymm, xmm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTPD2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPD2DQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPD2DQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPD2DQ takes 2 or 3 operands")
+ }
+ // VCVTPD2DQ xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), v[0], 0)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2DQ ymm, xmm
+ if len(vv) == 0 && isYMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[1]), v[0], 0)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2DQ m128, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPD2DQ m256, xmm
+ if len(vv) == 0 && isM256(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPD2DQ m512/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTPD2DQ {er}, zmm, ymm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPD2DQ zmm, ymm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2DQ m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPD2DQ m256/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPD2DQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2DQ ymm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPD2DQ")
+ }
+ return p
+}
+
+// VCVTPD2PS performs "Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values".
+//
+// Mnemonic : VCVTPD2PS
+// Supported forms : (11 forms)
+//
+// * VCVTPD2PS xmm, xmm [AVX]
+// * VCVTPD2PS ymm, xmm [AVX]
+// * VCVTPD2PS m128, xmm [AVX]
+// * VCVTPD2PS m256, xmm [AVX]
+// * VCVTPD2PS m512/m64bcst, ymm{k}{z} [AVX512F]
+// * VCVTPD2PS {er}, zmm, ymm{k}{z} [AVX512F]
+// * VCVTPD2PS zmm, ymm{k}{z} [AVX512F]
+// * VCVTPD2PS m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPD2PS m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPD2PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPD2PS ymm, xmm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTPD2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPD2PS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPD2PS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPD2PS takes 2 or 3 operands")
+ }
+ // VCVTPD2PS xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2PS ymm, xmm
+ if len(vv) == 0 && isYMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), v[0], 0)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2PS m128, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPD2PS m256, xmm
+ if len(vv) == 0 && isM256(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPD2PS m512/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTPD2PS {er}, zmm, ymm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPD2PS zmm, ymm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2PS m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPD2PS m256/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPD2PS xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2PS ymm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPD2PS")
+ }
+ return p
+}
+
+// VCVTPD2QQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Quadword Integers".
+//
+// Mnemonic : VCVTPD2QQ
+// Supported forms : (7 forms)
+//
+// * VCVTPD2QQ m512/m64bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTPD2QQ {er}, zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTPD2QQ zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTPD2QQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPD2QQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPD2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPD2QQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTPD2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPD2QQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPD2QQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPD2QQ takes 2 or 3 operands")
+ }
+ // VCVTPD2QQ m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTPD2QQ {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPD2QQ zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2QQ m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPD2QQ m256/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPD2QQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2QQ ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPD2QQ")
+ }
+ return p
+}
+
+// VCVTPD2UDQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers".
+//
+// Mnemonic : VCVTPD2UDQ
+// Supported forms : (7 forms)
+//
+// * VCVTPD2UDQ m512/m64bcst, ymm{k}{z} [AVX512F]
+// * VCVTPD2UDQ {er}, zmm, ymm{k}{z} [AVX512F]
+// * VCVTPD2UDQ zmm, ymm{k}{z} [AVX512F]
+// * VCVTPD2UDQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPD2UDQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPD2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPD2UDQ ymm, xmm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTPD2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPD2UDQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPD2UDQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPD2UDQ takes 2 or 3 operands")
+ }
+ // VCVTPD2UDQ m512/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTPD2UDQ {er}, zmm, ymm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPD2UDQ zmm, ymm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2UDQ m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPD2UDQ m256/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPD2UDQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2UDQ ymm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPD2UDQ")
+ }
+ return p
+}
+
+// VCVTPD2UQQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers".
+//
+// Mnemonic : VCVTPD2UQQ
+// Supported forms : (7 forms)
+//
+// * VCVTPD2UQQ m512/m64bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTPD2UQQ {er}, zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTPD2UQQ zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTPD2UQQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPD2UQQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPD2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPD2UQQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTPD2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPD2UQQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPD2UQQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPD2UQQ takes 2 or 3 operands")
+ }
+ // VCVTPD2UQQ m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTPD2UQQ {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPD2UQQ zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2UQQ m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPD2UQQ m256/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPD2UQQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPD2UQQ ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPD2UQQ")
+ }
+ return p
+}
+
+// VCVTPH2PS performs "Convert Half-Precision FP Values to Single-Precision FP Values".
+//
+// Mnemonic : VCVTPH2PS
+// Supported forms : (11 forms)
+//
+// * VCVTPH2PS xmm, xmm [F16C]
+// * VCVTPH2PS m64, xmm [F16C]
+// * VCVTPH2PS xmm, ymm [F16C]
+// * VCVTPH2PS m128, ymm [F16C]
+// * VCVTPH2PS m256, zmm{k}{z} [AVX512F]
+// * VCVTPH2PS {sae}, ymm, zmm{k}{z} [AVX512F]
+// * VCVTPH2PS ymm, zmm{k}{z} [AVX512F]
+// * VCVTPH2PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPH2PS xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPH2PS m64, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPH2PS m128, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTPH2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPH2PS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPH2PS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPH2PS takes 2 or 3 operands")
+ }
+ // VCVTPH2PS xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_F16C)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPH2PS m64, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) {
+ self.require(ISA_F16C)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPH2PS xmm, ymm
+ if len(vv) == 0 && isXMM(v0) && isYMM(v1) {
+ self.require(ISA_F16C)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPH2PS m128, ymm
+ if len(vv) == 0 && isM128(v0) && isYMM(v1) {
+ self.require(ISA_F16C)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPH2PS m256, zmm{k}{z}
+ if len(vv) == 0 && isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPH2PS {sae}, ymm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPH2PS ymm, zmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPH2PS xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPH2PS xmm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPH2PS m64, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTPH2PS m128, ymm{k}{z}
+ if len(vv) == 0 && isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPH2PS")
+ }
+ return p
+}
+
+// VCVTPS2DQ performs "Convert Packed Single-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : VCVTPS2DQ
+// Supported forms : (11 forms)
+//
+// * VCVTPS2DQ xmm, xmm [AVX]
+// * VCVTPS2DQ m128, xmm [AVX]
+// * VCVTPS2DQ ymm, ymm [AVX]
+// * VCVTPS2DQ m256, ymm [AVX]
+// * VCVTPS2DQ m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VCVTPS2DQ {er}, zmm, zmm{k}{z} [AVX512F]
+// * VCVTPS2DQ zmm, zmm{k}{z} [AVX512F]
+// * VCVTPS2DQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2DQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2DQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTPS2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPS2DQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPS2DQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPS2DQ takes 2 or 3 operands")
+ }
+ // VCVTPS2DQ xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2DQ m128, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPS2DQ ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), v[0], 0)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2DQ m256, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPS2DQ m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTPS2DQ {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPS2DQ zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2DQ m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPS2DQ m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPS2DQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2DQ ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPS2DQ")
+ }
+ return p
+}
+
+// VCVTPS2PD performs "Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values".
+//
+// Mnemonic : VCVTPS2PD
+// Supported forms : (11 forms)
+//
+// * VCVTPS2PD xmm, xmm [AVX]
+// * VCVTPS2PD m64, xmm [AVX]
+// * VCVTPS2PD xmm, ymm [AVX]
+// * VCVTPS2PD m128, ymm [AVX]
+// * VCVTPS2PD m256/m32bcst, zmm{k}{z} [AVX512F]
+// * VCVTPS2PD {sae}, ymm, zmm{k}{z} [AVX512F]
+// * VCVTPS2PD ymm, zmm{k}{z} [AVX512F]
+// * VCVTPS2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2PD m128/m32bcst, ymm{k}{z} [AVX512VL]
+// * VCVTPS2PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2PD xmm, ymm{k}{z} [AVX512VL]
+//
+func (self *Program) VCVTPS2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPS2PD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPS2PD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPS2PD takes 2 or 3 operands")
+ }
+ // VCVTPS2PD xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2PD m64, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPS2PD xmm, ymm
+ if len(vv) == 0 && isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), v[0], 0)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2PD m128, ymm
+ if len(vv) == 0 && isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTPS2PD m256/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPS2PD {sae}, ymm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPS2PD ymm, zmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2PD m64/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTPS2PD m128/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPS2PD xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2PD xmm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPS2PD")
+ }
+ return p
+}
+
+// VCVTPS2PH performs "Convert Single-Precision FP value to Half-Precision FP value".
+//
+// Mnemonic : VCVTPS2PH
+// Supported forms : (11 forms)
+//
+// * VCVTPS2PH imm8, xmm, xmm [F16C]
+// * VCVTPS2PH imm8, ymm, xmm [F16C]
+// * VCVTPS2PH imm8, xmm, m64 [F16C]
+// * VCVTPS2PH imm8, ymm, m128 [F16C]
+// * VCVTPS2PH imm8, zmm, m256{k}{z} [AVX512F]
+// * VCVTPS2PH imm8, {sae}, zmm, ymm{k}{z} [AVX512F]
+// * VCVTPS2PH imm8, zmm, ymm{k}{z} [AVX512F]
+// * VCVTPS2PH imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2PH imm8, xmm, m64{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2PH imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2PH imm8, ymm, m128{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTPS2PH(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPS2PH", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VCVTPS2PH", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VCVTPS2PH takes 3 or 4 operands")
+ }
+ // VCVTPS2PH imm8, xmm, xmm
+ if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_F16C)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x79)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, ymm, xmm
+ if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isXMM(v2) {
+ self.require(ISA_F16C)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x7d)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, xmm, m64
+ if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isM64(v2) {
+ self.require(ISA_F16C)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, ymm, m128
+ if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isM128(v2) {
+ self.require(ISA_F16C)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, zmm, m256{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isM256kz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[2]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, {sae}, zmm, ymm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isYMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[3]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[3]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, zmm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, xmm, m64{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isM64kz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[2]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, ymm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VCVTPS2PH imm8, ymm, m128{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[2]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPS2PH")
+ }
+ return p
+}
+
+// VCVTPS2QQ performs "Convert Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values".
+//
+// Mnemonic : VCVTPS2QQ
+// Supported forms : (7 forms)
+//
+// * VCVTPS2QQ m256/m32bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTPS2QQ {er}, ymm, zmm{k}{z} [AVX512DQ]
+// * VCVTPS2QQ ymm, zmm{k}{z} [AVX512DQ]
+// * VCVTPS2QQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPS2QQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPS2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPS2QQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTPS2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPS2QQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPS2QQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPS2QQ takes 2 or 3 operands")
+ }
+ // VCVTPS2QQ m256/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPS2QQ {er}, ymm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPS2QQ ymm, zmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2QQ m64/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTPS2QQ m128/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPS2QQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2QQ xmm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPS2QQ")
+ }
+ return p
+}
+
+// VCVTPS2UDQ performs "Convert Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values".
+//
+// Mnemonic : VCVTPS2UDQ
+// Supported forms : (7 forms)
+//
+// * VCVTPS2UDQ m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VCVTPS2UDQ {er}, zmm, zmm{k}{z} [AVX512F]
+// * VCVTPS2UDQ zmm, zmm{k}{z} [AVX512F]
+// * VCVTPS2UDQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2UDQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTPS2UDQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTPS2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPS2UDQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPS2UDQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPS2UDQ takes 2 or 3 operands")
+ }
+ // VCVTPS2UDQ m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTPS2UDQ {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPS2UDQ zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2UDQ m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPS2UDQ m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPS2UDQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2UDQ ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPS2UDQ")
+ }
+ return p
+}
+
+// VCVTPS2UQQ performs "Convert Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values".
+//
+// Mnemonic : VCVTPS2UQQ
+// Supported forms : (7 forms)
+//
+// * VCVTPS2UQQ m256/m32bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTPS2UQQ {er}, ymm, zmm{k}{z} [AVX512DQ]
+// * VCVTPS2UQQ ymm, zmm{k}{z} [AVX512DQ]
+// * VCVTPS2UQQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPS2UQQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPS2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTPS2UQQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTPS2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTPS2UQQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTPS2UQQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTPS2UQQ takes 2 or 3 operands")
+ }
+ // VCVTPS2UQQ m256/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTPS2UQQ {er}, ymm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTPS2UQQ ymm, zmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2UQQ m64/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTPS2UQQ m128/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTPS2UQQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTPS2UQQ xmm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTPS2UQQ")
+ }
+ return p
+}
+
+// VCVTQQ2PD performs "Convert Packed Quadword Integers to Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VCVTQQ2PD
+// Supported forms : (7 forms)
+//
+// * VCVTQQ2PD m512/m64bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTQQ2PD {er}, zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTQQ2PD zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTQQ2PD m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTQQ2PD m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTQQ2PD xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTQQ2PD ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTQQ2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTQQ2PD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTQQ2PD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTQQ2PD takes 2 or 3 operands")
+ }
+ // VCVTQQ2PD m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTQQ2PD {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTQQ2PD zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTQQ2PD m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTQQ2PD m256/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTQQ2PD xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTQQ2PD ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTQQ2PD")
+ }
+ return p
+}
+
+// VCVTQQ2PS performs "Convert Packed Quadword Integers to Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VCVTQQ2PS
+// Supported forms : (7 forms)
+//
+// * VCVTQQ2PS m512/m64bcst, ymm{k}{z} [AVX512DQ]
+// * VCVTQQ2PS {er}, zmm, ymm{k}{z} [AVX512DQ]
+// * VCVTQQ2PS zmm, ymm{k}{z} [AVX512DQ]
+// * VCVTQQ2PS m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTQQ2PS m256/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTQQ2PS xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTQQ2PS ymm, xmm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTQQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTQQ2PS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTQQ2PS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTQQ2PS takes 2 or 3 operands")
+ }
+ // VCVTQQ2PS m512/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTQQ2PS {er}, zmm, ymm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTQQ2PS zmm, ymm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTQQ2PS m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTQQ2PS m256/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTQQ2PS xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTQQ2PS ymm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTQQ2PS")
+ }
+ return p
+}
+
+// VCVTSD2SI performs "Convert Scalar Double-Precision FP Value to Integer".
+//
+// Mnemonic : VCVTSD2SI
+// Supported forms : (10 forms)
+//
+// * VCVTSD2SI xmm, r32 [AVX]
+// * VCVTSD2SI m64, r32 [AVX]
+// * VCVTSD2SI xmm, r64 [AVX]
+// * VCVTSD2SI m64, r64 [AVX]
+// * VCVTSD2SI m64, r32 [AVX512F]
+// * VCVTSD2SI m64, r64 [AVX512F]
+// * VCVTSD2SI {er}, xmm, r32 [AVX512F]
+// * VCVTSD2SI {er}, xmm, r64 [AVX512F]
+// * VCVTSD2SI xmm, r32 [AVX512F]
+// * VCVTSD2SI xmm, r64 [AVX512F]
+//
+func (self *Program) VCVTSD2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTSD2SI", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTSD2SI", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTSD2SI takes 2 or 3 operands")
+ }
+ // VCVTSD2SI xmm, r32
+ if len(vv) == 0 && isXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), v[0], 0)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSD2SI m64, r32
+ if len(vv) == 0 && isM64(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSD2SI xmm, r64
+ if len(vv) == 0 && isXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfb)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSD2SI m64, r64
+ if len(vv) == 0 && isM64(v0) && isReg64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x83, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSD2SI m64, r32
+ if len(vv) == 0 && isM64(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTSD2SI m64, r64
+ if len(vv) == 0 && isM64(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTSD2SI {er}, xmm, r32
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f)
+ m.emit((vcode(v[0]) << 5) | 0x18)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSD2SI {er}, xmm, r64
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff)
+ m.emit((vcode(v[0]) << 5) | 0x18)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSD2SI xmm, r32
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit(0x48)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSD2SI xmm, r64
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit(0x48)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTSD2SI")
+ }
+ return p
+}
+
+// VCVTSD2SS performs "Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value".
+//
+// Mnemonic : VCVTSD2SS
+// Supported forms : (5 forms)
+//
+// * VCVTSD2SS xmm, xmm, xmm [AVX]
+// * VCVTSD2SS m64, xmm, xmm [AVX]
+// * VCVTSD2SS m64, xmm, xmm{k}{z} [AVX512F]
+// * VCVTSD2SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VCVTSD2SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VCVTSD2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTSD2SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VCVTSD2SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VCVTSD2SS takes 3 or 4 operands")
+ }
+ // VCVTSD2SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSD2SS m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSD2SS m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VCVTSD2SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSD2SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTSD2SS")
+ }
+ return p
+}
+
+// VCVTSD2USI performs "Convert Scalar Double-Precision Floating-Point Value to Unsigned Doubleword Integer".
+//
+// Mnemonic : VCVTSD2USI
+// Supported forms : (6 forms)
+//
+// * VCVTSD2USI m64, r32 [AVX512F]
+// * VCVTSD2USI m64, r64 [AVX512F]
+// * VCVTSD2USI {er}, xmm, r32 [AVX512F]
+// * VCVTSD2USI {er}, xmm, r64 [AVX512F]
+// * VCVTSD2USI xmm, r32 [AVX512F]
+// * VCVTSD2USI xmm, r64 [AVX512F]
+//
+func (self *Program) VCVTSD2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTSD2USI", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTSD2USI", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTSD2USI takes 2 or 3 operands")
+ }
+ // VCVTSD2USI m64, r32
+ if len(vv) == 0 && isM64(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTSD2USI m64, r64
+ if len(vv) == 0 && isM64(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTSD2USI {er}, xmm, r32
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f)
+ m.emit((vcode(v[0]) << 5) | 0x18)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSD2USI {er}, xmm, r64
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff)
+ m.emit((vcode(v[0]) << 5) | 0x18)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSD2USI xmm, r32
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit(0x48)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSD2USI xmm, r64
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit(0x48)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTSD2USI")
+ }
+ return p
+}
+
+// VCVTSI2SD performs "Convert Dword Integer to Scalar Double-Precision FP Value".
+//
+// Mnemonic : VCVTSI2SD
+// Supported forms : (9 forms)
+//
+// * VCVTSI2SD r32, xmm, xmm [AVX]
+// * VCVTSI2SD r64, xmm, xmm [AVX]
+// * VCVTSI2SD m32, xmm, xmm [AVX]
+// * VCVTSI2SD m64, xmm, xmm [AVX]
+// * VCVTSI2SD r32, xmm, xmm [AVX512F]
+// * VCVTSI2SD m32, xmm, xmm [AVX512F]
+// * VCVTSI2SD m64, xmm, xmm [AVX512F]
+// * VCVTSI2SD {er}, r64, xmm, xmm [AVX512F]
+// * VCVTSI2SD r64, xmm, xmm [AVX512F]
+//
+func (self *Program) VCVTSI2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTSI2SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VCVTSI2SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VCVTSI2SD takes 3 or 4 operands")
+ }
+ // VCVTSI2SD r32, xmm, xmm
+ if len(vv) == 0 && isReg32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSI2SD r64, xmm, xmm
+ if len(vv) == 0 && isReg64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfb ^ (hlcode(v[1]) << 3))
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSI2SD m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSI2SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSI2SD r32, xmm, xmm
+ if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSI2SD m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VCVTSI2SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VCVTSI2SD {er}, r64, xmm, xmm
+ if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSI2SD r64, xmm, xmm
+ if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTSI2SD")
+ }
+ return p
+}
+
+// VCVTSI2SS performs "Convert Dword Integer to Scalar Single-Precision FP Value".
+//
+// Mnemonic : VCVTSI2SS
+// Supported forms : (10 forms)
+//
+// * VCVTSI2SS r32, xmm, xmm [AVX]
+// * VCVTSI2SS r64, xmm, xmm [AVX]
+// * VCVTSI2SS m32, xmm, xmm [AVX]
+// * VCVTSI2SS m64, xmm, xmm [AVX]
+// * VCVTSI2SS m32, xmm, xmm [AVX512F]
+// * VCVTSI2SS m64, xmm, xmm [AVX512F]
+// * VCVTSI2SS {er}, r32, xmm, xmm [AVX512F]
+// * VCVTSI2SS {er}, r64, xmm, xmm [AVX512F]
+// * VCVTSI2SS r32, xmm, xmm [AVX512F]
+// * VCVTSI2SS r64, xmm, xmm [AVX512F]
+//
+func (self *Program) VCVTSI2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTSI2SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VCVTSI2SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VCVTSI2SS takes 3 or 4 operands")
+ }
+ // VCVTSI2SS r32, xmm, xmm
+ if len(vv) == 0 && isReg32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSI2SS r64, xmm, xmm
+ if len(vv) == 0 && isReg64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfa ^ (hlcode(v[1]) << 3))
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSI2SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSI2SS m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x82, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSI2SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VCVTSI2SS m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VCVTSI2SS {er}, r32, xmm, xmm
+ if len(vv) == 1 && isER(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSI2SS {er}, r64, xmm, xmm
+ if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[2]) << 3))
+ m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSI2SS r32, xmm, xmm
+ if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSI2SS r64, xmm, xmm
+ if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTSI2SS")
+ }
+ return p
+}
+
+// VCVTSS2SD performs "Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value".
+//
+// Mnemonic : VCVTSS2SD
+// Supported forms : (5 forms)
+//
+// * VCVTSS2SD xmm, xmm, xmm [AVX]
+// * VCVTSS2SD m32, xmm, xmm [AVX]
+// * VCVTSS2SD m32, xmm, xmm{k}{z} [AVX512F]
+// * VCVTSS2SD {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VCVTSS2SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VCVTSS2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTSS2SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VCVTSS2SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VCVTSS2SD takes 3 or 4 operands")
+ }
+ // VCVTSS2SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSS2SD m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSS2SD m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VCVTSS2SD {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSS2SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTSS2SD")
+ }
+ return p
+}
+
+// VCVTSS2SI performs "Convert Scalar Single-Precision FP Value to Dword Integer".
+//
+// Mnemonic : VCVTSS2SI
+// Supported forms : (10 forms)
+//
+// * VCVTSS2SI xmm, r32 [AVX]
+// * VCVTSS2SI m32, r32 [AVX]
+// * VCVTSS2SI xmm, r64 [AVX]
+// * VCVTSS2SI m32, r64 [AVX]
+// * VCVTSS2SI m32, r32 [AVX512F]
+// * VCVTSS2SI m32, r64 [AVX512F]
+// * VCVTSS2SI {er}, xmm, r32 [AVX512F]
+// * VCVTSS2SI {er}, xmm, r64 [AVX512F]
+// * VCVTSS2SI xmm, r32 [AVX512F]
+// * VCVTSS2SI xmm, r64 [AVX512F]
+//
+func (self *Program) VCVTSS2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTSS2SI", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTSS2SI", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTSS2SI takes 2 or 3 operands")
+ }
+ // VCVTSS2SI xmm, r32
+ if len(vv) == 0 && isXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), v[0], 0)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSS2SI m32, r32
+ if len(vv) == 0 && isM32(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSS2SI xmm, r64
+ if len(vv) == 0 && isXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfa)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSS2SI m32, r64
+ if len(vv) == 0 && isM32(v0) && isReg64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x82, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTSS2SI m32, r32
+ if len(vv) == 0 && isM32(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VCVTSS2SI m32, r64
+ if len(vv) == 0 && isM32(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VCVTSS2SI {er}, xmm, r32
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e)
+ m.emit((vcode(v[0]) << 5) | 0x18)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSS2SI {er}, xmm, r64
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe)
+ m.emit((vcode(v[0]) << 5) | 0x18)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSS2SI xmm, r32
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x48)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSS2SI xmm, r64
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x48)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTSS2SI")
+ }
+ return p
+}
+
+// VCVTSS2USI performs "Convert Scalar Single-Precision Floating-Point Value to Unsigned Doubleword Integer".
+//
+// Mnemonic : VCVTSS2USI
+// Supported forms : (6 forms)
+//
+// * VCVTSS2USI m32, r32 [AVX512F]
+// * VCVTSS2USI m32, r64 [AVX512F]
+// * VCVTSS2USI {er}, xmm, r32 [AVX512F]
+// * VCVTSS2USI {er}, xmm, r64 [AVX512F]
+// * VCVTSS2USI xmm, r32 [AVX512F]
+// * VCVTSS2USI xmm, r64 [AVX512F]
+//
+func (self *Program) VCVTSS2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTSS2USI", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTSS2USI", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTSS2USI takes 2 or 3 operands")
+ }
+ // VCVTSS2USI m32, r32
+ if len(vv) == 0 && isM32(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VCVTSS2USI m32, r64
+ if len(vv) == 0 && isM32(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VCVTSS2USI {er}, xmm, r32
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e)
+ m.emit((vcode(v[0]) << 5) | 0x18)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSS2USI {er}, xmm, r64
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe)
+ m.emit((vcode(v[0]) << 5) | 0x18)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTSS2USI xmm, r32
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x48)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTSS2USI xmm, r64
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x48)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTSS2USI")
+ }
+ return p
+}
+
+// VCVTTPD2DQ performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : VCVTTPD2DQ
+// Supported forms : (11 forms)
+//
+// * VCVTTPD2DQ xmm, xmm [AVX]
+// * VCVTTPD2DQ ymm, xmm [AVX]
+// * VCVTTPD2DQ m128, xmm [AVX]
+// * VCVTTPD2DQ m256, xmm [AVX]
+// * VCVTTPD2DQ m512/m64bcst, ymm{k}{z} [AVX512F]
+// * VCVTTPD2DQ {sae}, zmm, ymm{k}{z} [AVX512F]
+// * VCVTTPD2DQ zmm, ymm{k}{z} [AVX512F]
+// * VCVTTPD2DQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPD2DQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPD2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPD2DQ ymm, xmm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTTPD2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTPD2DQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTPD2DQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTPD2DQ takes 2 or 3 operands")
+ }
+ // VCVTTPD2DQ xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2DQ ymm, xmm
+ if len(vv) == 0 && isYMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), v[0], 0)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2DQ m128, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTTPD2DQ m256, xmm
+ if len(vv) == 0 && isM256(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTTPD2DQ m512/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTTPD2DQ {sae}, zmm, ymm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isYMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTPD2DQ zmm, ymm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2DQ m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTTPD2DQ m256/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xe6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTTPD2DQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2DQ ymm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0xe6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTPD2DQ")
+ }
+ return p
+}
+
+// VCVTTPD2QQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Quadword Integers".
+//
+// Mnemonic : VCVTTPD2QQ
+// Supported forms : (7 forms)
+//
+// * VCVTTPD2QQ m512/m64bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTTPD2QQ {sae}, zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTTPD2QQ zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTTPD2QQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPD2QQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPD2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPD2QQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTTPD2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTPD2QQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTPD2QQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTPD2QQ takes 2 or 3 operands")
+ }
+ // VCVTTPD2QQ m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTTPD2QQ {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTPD2QQ zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2QQ m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTTPD2QQ m256/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTTPD2QQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2QQ ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTPD2QQ")
+ }
+ return p
+}
+
+// VCVTTPD2UDQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers".
+//
+// Mnemonic : VCVTTPD2UDQ
+// Supported forms : (7 forms)
+//
+// * VCVTTPD2UDQ m512/m64bcst, ymm{k}{z} [AVX512F]
+// * VCVTTPD2UDQ {sae}, zmm, ymm{k}{z} [AVX512F]
+// * VCVTTPD2UDQ zmm, ymm{k}{z} [AVX512F]
+// * VCVTTPD2UDQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPD2UDQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPD2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPD2UDQ ymm, xmm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTTPD2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTPD2UDQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTPD2UDQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTPD2UDQ takes 2 or 3 operands")
+ }
+ // VCVTTPD2UDQ m512/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTTPD2UDQ {sae}, zmm, ymm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isYMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTPD2UDQ zmm, ymm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2UDQ m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTTPD2UDQ m256/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTTPD2UDQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2UDQ ymm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfc)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTPD2UDQ")
+ }
+ return p
+}
+
+// VCVTTPD2UQQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers".
+//
+// Mnemonic : VCVTTPD2UQQ
+// Supported forms : (7 forms)
+//
+// * VCVTTPD2UQQ m512/m64bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTTPD2UQQ {sae}, zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTTPD2UQQ zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTTPD2UQQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPD2UQQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPD2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPD2UQQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTTPD2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTPD2UQQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTPD2UQQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTPD2UQQ takes 2 or 3 operands")
+ }
+ // VCVTTPD2UQQ m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTTPD2UQQ {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTPD2UQQ zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2UQQ m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTTPD2UQQ m256/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTTPD2UQQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPD2UQQ ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTPD2UQQ")
+ }
+ return p
+}
+
+// VCVTTPS2DQ performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers".
+//
+// Mnemonic : VCVTTPS2DQ
+// Supported forms : (11 forms)
+//
+// * VCVTTPS2DQ xmm, xmm [AVX]
+// * VCVTTPS2DQ m128, xmm [AVX]
+// * VCVTTPS2DQ ymm, ymm [AVX]
+// * VCVTTPS2DQ m256, ymm [AVX]
+// * VCVTTPS2DQ m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VCVTTPS2DQ {sae}, zmm, zmm{k}{z} [AVX512F]
+// * VCVTTPS2DQ zmm, zmm{k}{z} [AVX512F]
+// * VCVTTPS2DQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPS2DQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPS2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPS2DQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTTPS2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTPS2DQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTPS2DQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTPS2DQ takes 2 or 3 operands")
+ }
+ // VCVTTPS2DQ xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), v[0], 0)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2DQ m128, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTTPS2DQ ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), v[0], 0)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2DQ m256, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTTPS2DQ m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTTPS2DQ {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTPS2DQ zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2DQ m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTTPS2DQ m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x5b)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTTPS2DQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2DQ ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x5b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTPS2DQ")
+ }
+ return p
+}
+
+// VCVTTPS2QQ performs "Convert with Truncation Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values".
+//
+// Mnemonic : VCVTTPS2QQ
+// Supported forms : (7 forms)
+//
+// * VCVTTPS2QQ m256/m32bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTTPS2QQ {sae}, ymm, zmm{k}{z} [AVX512DQ]
+// * VCVTTPS2QQ ymm, zmm{k}{z} [AVX512DQ]
+// * VCVTTPS2QQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPS2QQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPS2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPS2QQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTTPS2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTPS2QQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTPS2QQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTPS2QQ takes 2 or 3 operands")
+ }
+ // VCVTTPS2QQ m256/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTTPS2QQ {sae}, ymm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTPS2QQ ymm, zmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2QQ m64/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTTPS2QQ m128/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTTPS2QQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2QQ xmm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTPS2QQ")
+ }
+ return p
+}
+
+// VCVTTPS2UDQ performs "Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values".
+//
+// Mnemonic : VCVTTPS2UDQ
+// Supported forms : (7 forms)
+//
+// * VCVTTPS2UDQ m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VCVTTPS2UDQ {sae}, zmm, zmm{k}{z} [AVX512F]
+// * VCVTTPS2UDQ zmm, zmm{k}{z} [AVX512F]
+// * VCVTTPS2UDQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPS2UDQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPS2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTTPS2UDQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTTPS2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTPS2UDQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTPS2UDQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTPS2UDQ takes 2 or 3 operands")
+ }
+ // VCVTTPS2UDQ m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTTPS2UDQ {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTPS2UDQ zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2UDQ m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTTPS2UDQ m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTTPS2UDQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2UDQ ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTPS2UDQ")
+ }
+ return p
+}
+
+// VCVTTPS2UQQ performs "Convert with Truncation Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values".
+//
+// Mnemonic : VCVTTPS2UQQ
+// Supported forms : (7 forms)
+//
+// * VCVTTPS2UQQ m256/m32bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTTPS2UQQ {sae}, ymm, zmm{k}{z} [AVX512DQ]
+// * VCVTTPS2UQQ ymm, zmm{k}{z} [AVX512DQ]
+// * VCVTTPS2UQQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPS2UQQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPS2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTTPS2UQQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTTPS2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTPS2UQQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTPS2UQQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTPS2UQQ takes 2 or 3 operands")
+ }
+ // VCVTTPS2UQQ m256/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTTPS2UQQ {sae}, ymm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTPS2UQQ ymm, zmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2UQQ m64/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTTPS2UQQ m128/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTTPS2UQQ xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTPS2UQQ xmm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTPS2UQQ")
+ }
+ return p
+}
+
+// VCVTTSD2SI performs "Convert with Truncation Scalar Double-Precision FP Value to Signed Integer".
+//
+// Mnemonic : VCVTTSD2SI
+// Supported forms : (10 forms)
+//
+// * VCVTTSD2SI xmm, r32 [AVX]
+// * VCVTTSD2SI m64, r32 [AVX]
+// * VCVTTSD2SI xmm, r64 [AVX]
+// * VCVTTSD2SI m64, r64 [AVX]
+// * VCVTTSD2SI m64, r32 [AVX512F]
+// * VCVTTSD2SI m64, r64 [AVX512F]
+// * VCVTTSD2SI {sae}, xmm, r32 [AVX512F]
+// * VCVTTSD2SI {sae}, xmm, r64 [AVX512F]
+// * VCVTTSD2SI xmm, r32 [AVX512F]
+// * VCVTTSD2SI xmm, r64 [AVX512F]
+//
+func (self *Program) VCVTTSD2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTSD2SI", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTSD2SI", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTSD2SI takes 2 or 3 operands")
+ }
+ // VCVTTSD2SI xmm, r32
+ if len(vv) == 0 && isXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), v[0], 0)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTSD2SI m64, r32
+ if len(vv) == 0 && isM64(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTTSD2SI xmm, r64
+ if len(vv) == 0 && isXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfb)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTSD2SI m64, r64
+ if len(vv) == 0 && isM64(v0) && isReg64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x83, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTTSD2SI m64, r32
+ if len(vv) == 0 && isM64(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTTSD2SI m64, r64
+ if len(vv) == 0 && isM64(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTTSD2SI {sae}, xmm, r32
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f)
+ m.emit(0x18)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTSD2SI {sae}, xmm, r64
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff)
+ m.emit(0x18)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTSD2SI xmm, r32
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit(0x48)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTSD2SI xmm, r64
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit(0x48)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTSD2SI")
+ }
+ return p
+}
+
+// VCVTTSD2USI performs "Convert with Truncation Scalar Double-Precision Floating-Point Value to Unsigned Integer".
+//
+// Mnemonic : VCVTTSD2USI
+// Supported forms : (6 forms)
+//
+// * VCVTTSD2USI m64, r32 [AVX512F]
+// * VCVTTSD2USI m64, r64 [AVX512F]
+// * VCVTTSD2USI {sae}, xmm, r32 [AVX512F]
+// * VCVTTSD2USI {sae}, xmm, r64 [AVX512F]
+// * VCVTTSD2USI xmm, r32 [AVX512F]
+// * VCVTTSD2USI xmm, r64 [AVX512F]
+//
+func (self *Program) VCVTTSD2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTSD2USI", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTSD2USI", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTSD2USI takes 2 or 3 operands")
+ }
+ // VCVTTSD2USI m64, r32
+ if len(vv) == 0 && isM64(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTTSD2USI m64, r64
+ if len(vv) == 0 && isM64(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTTSD2USI {sae}, xmm, r32
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f)
+ m.emit(0x18)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTSD2USI {sae}, xmm, r64
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff)
+ m.emit(0x18)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTSD2USI xmm, r32
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit(0x48)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTSD2USI xmm, r64
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit(0x48)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTSD2USI")
+ }
+ return p
+}
+
+// VCVTTSS2SI performs "Convert with Truncation Scalar Single-Precision FP Value to Dword Integer".
+//
+// Mnemonic : VCVTTSS2SI
+// Supported forms : (10 forms)
+//
+// * VCVTTSS2SI xmm, r32 [AVX]
+// * VCVTTSS2SI m32, r32 [AVX]
+// * VCVTTSS2SI xmm, r64 [AVX]
+// * VCVTTSS2SI m32, r64 [AVX]
+// * VCVTTSS2SI m32, r32 [AVX512F]
+// * VCVTTSS2SI m32, r64 [AVX512F]
+// * VCVTTSS2SI {sae}, xmm, r32 [AVX512F]
+// * VCVTTSS2SI {sae}, xmm, r64 [AVX512F]
+// * VCVTTSS2SI xmm, r32 [AVX512F]
+// * VCVTTSS2SI xmm, r64 [AVX512F]
+//
+func (self *Program) VCVTTSS2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTSS2SI", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTSS2SI", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTSS2SI takes 2 or 3 operands")
+ }
+ // VCVTTSS2SI xmm, r32
+ if len(vv) == 0 && isXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), v[0], 0)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTSS2SI m32, r32
+ if len(vv) == 0 && isM32(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTTSS2SI xmm, r64
+ if len(vv) == 0 && isXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfa)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTSS2SI m32, r64
+ if len(vv) == 0 && isM32(v0) && isReg64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x82, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VCVTTSS2SI m32, r32
+ if len(vv) == 0 && isM32(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VCVTTSS2SI m32, r64
+ if len(vv) == 0 && isM32(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VCVTTSS2SI {sae}, xmm, r32
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x18)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTSS2SI {sae}, xmm, r64
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x18)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTSS2SI xmm, r32
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x48)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTSS2SI xmm, r64
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x48)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTSS2SI")
+ }
+ return p
+}
+
+// VCVTTSS2USI performs "Convert with Truncation Scalar Single-Precision Floating-Point Value to Unsigned Integer".
+//
+// Mnemonic : VCVTTSS2USI
+// Supported forms : (6 forms)
+//
+// * VCVTTSS2USI m32, r32 [AVX512F]
+// * VCVTTSS2USI m32, r64 [AVX512F]
+// * VCVTTSS2USI {sae}, xmm, r32 [AVX512F]
+// * VCVTTSS2USI {sae}, xmm, r64 [AVX512F]
+// * VCVTTSS2USI xmm, r32 [AVX512F]
+// * VCVTTSS2USI xmm, r64 [AVX512F]
+//
+func (self *Program) VCVTTSS2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTTSS2USI", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTTSS2USI", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTTSS2USI takes 2 or 3 operands")
+ }
+ // VCVTTSS2USI m32, r32
+ if len(vv) == 0 && isM32(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VCVTTSS2USI m32, r64
+ if len(vv) == 0 && isM32(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VCVTTSS2USI {sae}, xmm, r32
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x18)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTSS2USI {sae}, xmm, r64
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x18)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTTSS2USI xmm, r32
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x48)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTTSS2USI xmm, r64
+ if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x48)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTTSS2USI")
+ }
+ return p
+}
+
+// VCVTUDQ2PD performs "Convert Packed Unsigned Doubleword Integers to Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VCVTUDQ2PD
+// Supported forms : (6 forms)
+//
+// * VCVTUDQ2PD m256/m32bcst, zmm{k}{z} [AVX512F]
+// * VCVTUDQ2PD ymm, zmm{k}{z} [AVX512F]
+// * VCVTUDQ2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTUDQ2PD m128/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCVTUDQ2PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTUDQ2PD xmm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTUDQ2PD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VCVTUDQ2PD", 2, Operands { v0, v1 })
+ // VCVTUDQ2PD m256/m32bcst, zmm{k}{z}
+ if isM256M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTUDQ2PD ymm, zmm{k}{z}
+ if isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUDQ2PD m64/m32bcst, xmm{k}{z}
+ if isM64M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VCVTUDQ2PD m128/m32bcst, ymm{k}{z}
+ if isM128M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTUDQ2PD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUDQ2PD xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTUDQ2PD")
+ }
+ return p
+}
+
+// VCVTUDQ2PS performs "Convert Packed Unsigned Doubleword Integers to Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VCVTUDQ2PS
+// Supported forms : (7 forms)
+//
+// * VCVTUDQ2PS m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VCVTUDQ2PS {er}, zmm, zmm{k}{z} [AVX512F]
+// * VCVTUDQ2PS zmm, zmm{k}{z} [AVX512F]
+// * VCVTUDQ2PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTUDQ2PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VCVTUDQ2PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VCVTUDQ2PS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VCVTUDQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTUDQ2PS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTUDQ2PS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTUDQ2PS takes 2 or 3 operands")
+ }
+ // VCVTUDQ2PS m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTUDQ2PS {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTUDQ2PS zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUDQ2PS m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTUDQ2PS m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTUDQ2PS xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUDQ2PS ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTUDQ2PS")
+ }
+ return p
+}
+
+// VCVTUQQ2PD performs "Convert Packed Unsigned Quadword Integers to Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VCVTUQQ2PD
+// Supported forms : (7 forms)
+//
+// * VCVTUQQ2PD m512/m64bcst, zmm{k}{z} [AVX512DQ]
+// * VCVTUQQ2PD {er}, zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTUQQ2PD zmm, zmm{k}{z} [AVX512DQ]
+// * VCVTUQQ2PD m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTUQQ2PD m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTUQQ2PD xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTUQQ2PD ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTUQQ2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTUQQ2PD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTUQQ2PD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTUQQ2PD takes 2 or 3 operands")
+ }
+ // VCVTUQQ2PD m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTUQQ2PD {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTUQQ2PD zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUQQ2PD m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTUQQ2PD m256/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTUQQ2PD xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUQQ2PD ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTUQQ2PD")
+ }
+ return p
+}
+
+// VCVTUQQ2PS performs "Convert Packed Unsigned Quadword Integers to Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VCVTUQQ2PS
+// Supported forms : (7 forms)
+//
+// * VCVTUQQ2PS m512/m64bcst, ymm{k}{z} [AVX512DQ]
+// * VCVTUQQ2PS {er}, zmm, ymm{k}{z} [AVX512DQ]
+// * VCVTUQQ2PS zmm, ymm{k}{z} [AVX512DQ]
+// * VCVTUQQ2PS m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTUQQ2PS m256/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTUQQ2PS xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VCVTUQQ2PS ymm, xmm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VCVTUQQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTUQQ2PS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VCVTUQQ2PS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VCVTUQQ2PS takes 2 or 3 operands")
+ }
+ // VCVTUQQ2PS m512/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VCVTUQQ2PS {er}, zmm, ymm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTUQQ2PS zmm, ymm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUQQ2PS m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VCVTUQQ2PS m256/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VCVTUQQ2PS xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUQQ2PS ymm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTUQQ2PS")
+ }
+ return p
+}
+
+// VCVTUSI2SD performs "Convert Unsigned Integer to Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VCVTUSI2SD
+// Supported forms : (5 forms)
+//
+// * VCVTUSI2SD r32, xmm, xmm [AVX512F]
+// * VCVTUSI2SD m32, xmm, xmm [AVX512F]
+// * VCVTUSI2SD m64, xmm, xmm [AVX512F]
+// * VCVTUSI2SD {er}, r64, xmm, xmm [AVX512F]
+// * VCVTUSI2SD r64, xmm, xmm [AVX512F]
+//
+func (self *Program) VCVTUSI2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTUSI2SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VCVTUSI2SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VCVTUSI2SD takes 3 or 4 operands")
+ }
+ // VCVTUSI2SD r32, xmm, xmm
+ if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUSI2SD m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x7b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VCVTUSI2SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x7b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VCVTUSI2SD {er}, r64, xmm, xmm
+ if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTUSI2SD r64, xmm, xmm
+ if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTUSI2SD")
+ }
+ return p
+}
+
+// VCVTUSI2SS performs "Convert Unsigned Integer to Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VCVTUSI2SS
+// Supported forms : (6 forms)
+//
+// * VCVTUSI2SS m32, xmm, xmm [AVX512F]
+// * VCVTUSI2SS m64, xmm, xmm [AVX512F]
+// * VCVTUSI2SS {er}, r32, xmm, xmm [AVX512F]
+// * VCVTUSI2SS {er}, r64, xmm, xmm [AVX512F]
+// * VCVTUSI2SS r32, xmm, xmm [AVX512F]
+// * VCVTUSI2SS r64, xmm, xmm [AVX512F]
+//
+func (self *Program) VCVTUSI2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VCVTUSI2SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VCVTUSI2SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VCVTUSI2SS takes 3 or 4 operands")
+ }
+ // VCVTUSI2SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x7b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VCVTUSI2SS m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x7b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VCVTUSI2SS {er}, r32, xmm, xmm
+ if len(vv) == 1 && isER(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTUSI2SS {er}, r64, xmm, xmm
+ if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[2]) << 3))
+ m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VCVTUSI2SS r32, xmm, xmm
+ if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VCVTUSI2SS r64, xmm, xmm
+ if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VCVTUSI2SS")
+ }
+ return p
+}
+
+// VDBPSADBW performs "Double Block Packed Sum-Absolute-Differences on Unsigned Bytes".
+//
+// Mnemonic : VDBPSADBW
+// Supported forms : (6 forms)
+//
+// * VDBPSADBW imm8, zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VDBPSADBW imm8, m512, zmm, zmm{k}{z} [AVX512BW]
+// * VDBPSADBW imm8, xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VDBPSADBW imm8, m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VDBPSADBW imm8, ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VDBPSADBW imm8, m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VDBPSADBW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VDBPSADBW", 4, Operands { v0, v1, v2, v3 })
+ // VDBPSADBW imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VDBPSADBW imm8, m512, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x42)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VDBPSADBW imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VDBPSADBW imm8, m128, xmm, xmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x42)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VDBPSADBW imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VDBPSADBW imm8, m256, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x42)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VDBPSADBW")
+ }
+ return p
+}
+
+// VDIVPD performs "Divide Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VDIVPD
+// Supported forms : (11 forms)
+//
+// * VDIVPD xmm, xmm, xmm [AVX]
+// * VDIVPD m128, xmm, xmm [AVX]
+// * VDIVPD ymm, ymm, ymm [AVX]
+// * VDIVPD m256, ymm, ymm [AVX]
+// * VDIVPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VDIVPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VDIVPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VDIVPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VDIVPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VDIVPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VDIVPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VDIVPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VDIVPD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VDIVPD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VDIVPD takes 3 or 4 operands")
+ }
+ // VDIVPD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVPD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VDIVPD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVPD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VDIVPD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VDIVPD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VDIVPD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVPD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VDIVPD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVPD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VDIVPD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VDIVPD")
+ }
+ return p
+}
+
+// VDIVPS performs "Divide Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VDIVPS
+// Supported forms : (11 forms)
+//
+// * VDIVPS xmm, xmm, xmm [AVX]
+// * VDIVPS m128, xmm, xmm [AVX]
+// * VDIVPS ymm, ymm, ymm [AVX]
+// * VDIVPS m256, ymm, ymm [AVX]
+// * VDIVPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VDIVPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VDIVPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VDIVPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VDIVPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VDIVPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VDIVPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VDIVPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VDIVPS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VDIVPS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VDIVPS takes 3 or 4 operands")
+ }
+ // VDIVPS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVPS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VDIVPS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVPS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VDIVPS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VDIVPS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VDIVPS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVPS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VDIVPS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVPS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VDIVPS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VDIVPS")
+ }
+ return p
+}
+
+// VDIVSD performs "Divide Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VDIVSD
+// Supported forms : (5 forms)
+//
+// * VDIVSD xmm, xmm, xmm [AVX]
+// * VDIVSD m64, xmm, xmm [AVX]
+// * VDIVSD m64, xmm, xmm{k}{z} [AVX512F]
+// * VDIVSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VDIVSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VDIVSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VDIVSD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VDIVSD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VDIVSD takes 3 or 4 operands")
+ }
+ // VDIVSD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVSD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VDIVSD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VDIVSD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VDIVSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VDIVSD")
+ }
+ return p
+}
+
+// VDIVSS performs "Divide Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VDIVSS
+// Supported forms : (5 forms)
+//
+// * VDIVSS xmm, xmm, xmm [AVX]
+// * VDIVSS m32, xmm, xmm [AVX]
+// * VDIVSS m32, xmm, xmm{k}{z} [AVX512F]
+// * VDIVSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VDIVSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VDIVSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VDIVSS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VDIVSS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VDIVSS takes 3 or 4 operands")
+ }
+ // VDIVSS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VDIVSS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VDIVSS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VDIVSS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VDIVSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VDIVSS")
+ }
+ return p
+}
+
+// VDPPD performs "Dot Product of Packed Double Precision Floating-Point Values".
+//
+// Mnemonic : VDPPD
+// Supported forms : (2 forms)
+//
+// * VDPPD imm8, xmm, xmm, xmm [AVX]
+// * VDPPD imm8, m128, xmm, xmm [AVX]
+//
+func (self *Program) VDPPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VDPPD", 4, Operands { v0, v1, v2, v3 })
+ // VDPPD imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VDPPD imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x41)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VDPPD")
+ }
+ return p
+}
+
+// VDPPS performs "Dot Product of Packed Single Precision Floating-Point Values".
+//
+// Mnemonic : VDPPS
+// Supported forms : (4 forms)
+//
+// * VDPPS imm8, xmm, xmm, xmm [AVX]
+// * VDPPS imm8, m128, xmm, xmm [AVX]
+// * VDPPS imm8, ymm, ymm, ymm [AVX]
+// * VDPPS imm8, m256, ymm, ymm [AVX]
+//
+func (self *Program) VDPPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VDPPS", 4, Operands { v0, v1, v2, v3 })
+ // VDPPS imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VDPPS imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VDPPS imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VDPPS imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VDPPS")
+ }
+ return p
+}
+
+// VEXP2PD performs "Approximation to the Exponential 2^x of Packed Double-Precision Floating-Point Values with Less Than 2^-23 Relative Error".
+//
+// Mnemonic : VEXP2PD
+// Supported forms : (3 forms)
+//
+// * VEXP2PD m512/m64bcst, zmm{k}{z} [AVX512ER]
+// * VEXP2PD {sae}, zmm, zmm{k}{z} [AVX512ER]
+// * VEXP2PD zmm, zmm{k}{z} [AVX512ER]
+//
+func (self *Program) VEXP2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VEXP2PD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VEXP2PD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VEXP2PD takes 2 or 3 operands")
+ }
+ // VEXP2PD m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xc8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VEXP2PD {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0xc8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VEXP2PD zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xc8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXP2PD")
+ }
+ return p
+}
+
+// VEXP2PS performs "Approximation to the Exponential 2^x of Packed Single-Precision Floating-Point Values with Less Than 2^-23 Relative Error".
+//
+// Mnemonic : VEXP2PS
+// Supported forms : (3 forms)
+//
+// * VEXP2PS m512/m32bcst, zmm{k}{z} [AVX512ER]
+// * VEXP2PS {sae}, zmm, zmm{k}{z} [AVX512ER]
+// * VEXP2PS zmm, zmm{k}{z} [AVX512ER]
+//
+func (self *Program) VEXP2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VEXP2PS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VEXP2PS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VEXP2PS takes 2 or 3 operands")
+ }
+ // VEXP2PS m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xc8)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VEXP2PS {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0xc8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VEXP2PS zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xc8)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXP2PS")
+ }
+ return p
+}
+
+// VEXPANDPD performs "Load Sparse Packed Double-Precision Floating-Point Values from Dense Memory".
+//
+// Mnemonic : VEXPANDPD
+// Supported forms : (6 forms)
+//
+// * VEXPANDPD zmm, zmm{k}{z} [AVX512F]
+// * VEXPANDPD m512, zmm{k}{z} [AVX512F]
+// * VEXPANDPD xmm, xmm{k}{z} [AVX512VL]
+// * VEXPANDPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VEXPANDPD m128, xmm{k}{z} [AVX512VL]
+// * VEXPANDPD m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VEXPANDPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VEXPANDPD", 2, Operands { v0, v1 })
+ // VEXPANDPD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x88)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VEXPANDPD m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x88)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VEXPANDPD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x88)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VEXPANDPD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x88)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VEXPANDPD m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x88)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VEXPANDPD m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x88)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXPANDPD")
+ }
+ return p
+}
+
+// VEXPANDPS performs "Load Sparse Packed Single-Precision Floating-Point Values from Dense Memory".
+//
+// Mnemonic : VEXPANDPS
+// Supported forms : (6 forms)
+//
+// * VEXPANDPS zmm, zmm{k}{z} [AVX512F]
+// * VEXPANDPS m512, zmm{k}{z} [AVX512F]
+// * VEXPANDPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VEXPANDPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VEXPANDPS m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VEXPANDPS m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VEXPANDPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VEXPANDPS", 2, Operands { v0, v1 })
+ // VEXPANDPS zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x88)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VEXPANDPS m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x88)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VEXPANDPS xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x88)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VEXPANDPS ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x88)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VEXPANDPS m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x88)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VEXPANDPS m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x88)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXPANDPS")
+ }
+ return p
+}
+
+// VEXTRACTF128 performs "Extract Packed Floating-Point Values".
+//
+// Mnemonic : VEXTRACTF128
+// Supported forms : (2 forms)
+//
+// * VEXTRACTF128 imm8, ymm, xmm [AVX]
+// * VEXTRACTF128 imm8, ymm, m128 [AVX]
+//
+func (self *Program) VEXTRACTF128(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTF128", 3, Operands { v0, v1, v2 })
+ // VEXTRACTF128 imm8, ymm, xmm
+ if isImm8(v0) && isYMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x7d)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTF128 imm8, ymm, m128
+ if isImm8(v0) && isYMM(v1) && isM128(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTF128")
+ }
+ return p
+}
+
+// VEXTRACTF32X4 performs "Extract 128 Bits of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VEXTRACTF32X4
+// Supported forms : (4 forms)
+//
+// * VEXTRACTF32X4 imm8, zmm, xmm{k}{z} [AVX512F]
+// * VEXTRACTF32X4 imm8, zmm, m128{k}{z} [AVX512F]
+// * VEXTRACTF32X4 imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VEXTRACTF32X4 imm8, ymm, m128{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VEXTRACTF32X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTF32X4", 3, Operands { v0, v1, v2 })
+ // VEXTRACTF32X4 imm8, zmm, xmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTF32X4 imm8, zmm, m128{k}{z}
+ if isImm8(v0) && isZMM(v1) && isM128kz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[2]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTF32X4 imm8, ymm, xmm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTF32X4 imm8, ymm, m128{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[2]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTF32X4")
+ }
+ return p
+}
+
+// VEXTRACTF32X8 performs "Extract 256 Bits of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VEXTRACTF32X8
+// Supported forms : (2 forms)
+//
+// * VEXTRACTF32X8 imm8, zmm, ymm{k}{z} [AVX512DQ]
+// * VEXTRACTF32X8 imm8, zmm, m256{k}{z} [AVX512DQ]
+//
+func (self *Program) VEXTRACTF32X8(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTF32X8", 3, Operands { v0, v1, v2 })
+ // VEXTRACTF32X8 imm8, zmm, ymm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x1b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTF32X8 imm8, zmm, m256{k}{z}
+ if isImm8(v0) && isZMM(v1) && isM256kz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x1b)
+ m.mrsd(lcode(v[1]), addr(v[2]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTF32X8")
+ }
+ return p
+}
+
+// VEXTRACTF64X2 performs "Extract 128 Bits of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VEXTRACTF64X2
+// Supported forms : (4 forms)
+//
+// * VEXTRACTF64X2 imm8, zmm, xmm{k}{z} [AVX512DQ]
+// * VEXTRACTF64X2 imm8, zmm, m128{k}{z} [AVX512DQ]
+// * VEXTRACTF64X2 imm8, ymm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VEXTRACTF64X2 imm8, ymm, m128{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VEXTRACTF64X2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTF64X2", 3, Operands { v0, v1, v2 })
+ // VEXTRACTF64X2 imm8, zmm, xmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTF64X2 imm8, zmm, m128{k}{z}
+ if isImm8(v0) && isZMM(v1) && isM128kz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[2]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTF64X2 imm8, ymm, xmm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x19)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTF64X2 imm8, ymm, m128{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x19)
+ m.mrsd(lcode(v[1]), addr(v[2]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTF64X2")
+ }
+ return p
+}
+
+// VEXTRACTF64X4 performs "Extract 256 Bits of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VEXTRACTF64X4
+// Supported forms : (2 forms)
+//
+// * VEXTRACTF64X4 imm8, zmm, ymm{k}{z} [AVX512F]
+// * VEXTRACTF64X4 imm8, zmm, m256{k}{z} [AVX512F]
+//
+func (self *Program) VEXTRACTF64X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTF64X4", 3, Operands { v0, v1, v2 })
+ // VEXTRACTF64X4 imm8, zmm, ymm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x1b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTF64X4 imm8, zmm, m256{k}{z}
+ if isImm8(v0) && isZMM(v1) && isM256kz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x1b)
+ m.mrsd(lcode(v[1]), addr(v[2]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTF64X4")
+ }
+ return p
+}
+
+// VEXTRACTI128 performs "Extract Packed Integer Values".
+//
+// Mnemonic : VEXTRACTI128
+// Supported forms : (2 forms)
+//
+// * VEXTRACTI128 imm8, ymm, xmm [AVX2]
+// * VEXTRACTI128 imm8, ymm, m128 [AVX2]
+//
+func (self *Program) VEXTRACTI128(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTI128", 3, Operands { v0, v1, v2 })
+ // VEXTRACTI128 imm8, ymm, xmm
+ if isImm8(v0) && isYMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x7d)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTI128 imm8, ymm, m128
+ if isImm8(v0) && isYMM(v1) && isM128(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0)
+ m.emit(0x39)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTI128")
+ }
+ return p
+}
+
+// VEXTRACTI32X4 performs "Extract 128 Bits of Packed Doubleword Integer Values".
+//
+// Mnemonic : VEXTRACTI32X4
+// Supported forms : (4 forms)
+//
+// * VEXTRACTI32X4 imm8, zmm, xmm{k}{z} [AVX512F]
+// * VEXTRACTI32X4 imm8, zmm, m128{k}{z} [AVX512F]
+// * VEXTRACTI32X4 imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VEXTRACTI32X4 imm8, ymm, m128{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VEXTRACTI32X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTI32X4", 3, Operands { v0, v1, v2 })
+ // VEXTRACTI32X4 imm8, zmm, xmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTI32X4 imm8, zmm, m128{k}{z}
+ if isImm8(v0) && isZMM(v1) && isM128kz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x39)
+ m.mrsd(lcode(v[1]), addr(v[2]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTI32X4 imm8, ymm, xmm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTI32X4 imm8, ymm, m128{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x39)
+ m.mrsd(lcode(v[1]), addr(v[2]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTI32X4")
+ }
+ return p
+}
+
+// VEXTRACTI32X8 performs "Extract 256 Bits of Packed Doubleword Integer Values".
+//
+// Mnemonic : VEXTRACTI32X8
+// Supported forms : (2 forms)
+//
+// * VEXTRACTI32X8 imm8, zmm, ymm{k}{z} [AVX512DQ]
+// * VEXTRACTI32X8 imm8, zmm, m256{k}{z} [AVX512DQ]
+//
+func (self *Program) VEXTRACTI32X8(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTI32X8", 3, Operands { v0, v1, v2 })
+ // VEXTRACTI32X8 imm8, zmm, ymm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTI32X8 imm8, zmm, m256{k}{z}
+ if isImm8(v0) && isZMM(v1) && isM256kz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3b)
+ m.mrsd(lcode(v[1]), addr(v[2]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTI32X8")
+ }
+ return p
+}
+
+// VEXTRACTI64X2 performs "Extract 128 Bits of Packed Quadword Integer Values".
+//
+// Mnemonic : VEXTRACTI64X2
+// Supported forms : (4 forms)
+//
+// * VEXTRACTI64X2 imm8, zmm, xmm{k}{z} [AVX512DQ]
+// * VEXTRACTI64X2 imm8, zmm, m128{k}{z} [AVX512DQ]
+// * VEXTRACTI64X2 imm8, ymm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VEXTRACTI64X2 imm8, ymm, m128{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VEXTRACTI64X2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTI64X2", 3, Operands { v0, v1, v2 })
+ // VEXTRACTI64X2 imm8, zmm, xmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTI64X2 imm8, zmm, m128{k}{z}
+ if isImm8(v0) && isZMM(v1) && isM128kz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x39)
+ m.mrsd(lcode(v[1]), addr(v[2]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTI64X2 imm8, ymm, xmm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTI64X2 imm8, ymm, m128{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x39)
+ m.mrsd(lcode(v[1]), addr(v[2]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTI64X2")
+ }
+ return p
+}
+
+// VEXTRACTI64X4 performs "Extract 256 Bits of Packed Quadword Integer Values".
+//
+// Mnemonic : VEXTRACTI64X4
+// Supported forms : (2 forms)
+//
+// * VEXTRACTI64X4 imm8, zmm, ymm{k}{z} [AVX512F]
+// * VEXTRACTI64X4 imm8, zmm, m256{k}{z} [AVX512F]
+//
+func (self *Program) VEXTRACTI64X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTI64X4", 3, Operands { v0, v1, v2 })
+ // VEXTRACTI64X4 imm8, zmm, ymm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTI64X4 imm8, zmm, m256{k}{z}
+ if isImm8(v0) && isZMM(v1) && isM256kz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3b)
+ m.mrsd(lcode(v[1]), addr(v[2]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTI64X4")
+ }
+ return p
+}
+
+// VEXTRACTPS performs "Extract Packed Single Precision Floating-Point Value".
+//
+// Mnemonic : VEXTRACTPS
+// Supported forms : (4 forms)
+//
+// * VEXTRACTPS imm8, xmm, r32 [AVX]
+// * VEXTRACTPS imm8, xmm, m32 [AVX]
+// * VEXTRACTPS imm8, xmm, r32 [AVX512F]
+// * VEXTRACTPS imm8, xmm, m32 [AVX512F]
+//
+func (self *Program) VEXTRACTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VEXTRACTPS", 3, Operands { v0, v1, v2 })
+ // VEXTRACTPS imm8, xmm, r32
+ if isImm8(v0) && isXMM(v1) && isReg32(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x79)
+ m.emit(0x17)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTPS imm8, xmm, m32
+ if isImm8(v0) && isXMM(v1) && isM32(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
+ m.emit(0x17)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTPS imm8, xmm, r32
+ if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit(0x08)
+ m.emit(0x17)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VEXTRACTPS imm8, xmm, m32
+ if isImm8(v0) && isEVEXXMM(v1) && isM32(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
+ m.emit(0x17)
+ m.mrsd(lcode(v[1]), addr(v[2]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VEXTRACTPS")
+ }
+ return p
+}
+
+// VFIXUPIMMPD performs "Fix Up Special Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFIXUPIMMPD
+// Supported forms : (7 forms)
+//
+// * VFIXUPIMMPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFIXUPIMMPD imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFIXUPIMMPD imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFIXUPIMMPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFIXUPIMMPD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFIXUPIMMPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFIXUPIMMPD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFIXUPIMMPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFIXUPIMMPD", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VFIXUPIMMPD", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VFIXUPIMMPD takes 4 or 5 operands")
+ }
+ // VFIXUPIMMPD imm8, m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPD imm8, {sae}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPD imm8, zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPD imm8, m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPD imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPD imm8, m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPD imm8, ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFIXUPIMMPD")
+ }
+ return p
+}
+
+// VFIXUPIMMPS performs "Fix Up Special Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFIXUPIMMPS
+// Supported forms : (7 forms)
+//
+// * VFIXUPIMMPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFIXUPIMMPS imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFIXUPIMMPS imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFIXUPIMMPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512VL]
+// * VFIXUPIMMPS imm8, xmm, xmm, xmm{k}{z} [AVX512VL]
+// * VFIXUPIMMPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFIXUPIMMPS imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFIXUPIMMPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFIXUPIMMPS", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VFIXUPIMMPS", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VFIXUPIMMPS takes 4 or 5 operands")
+ }
+ // VFIXUPIMMPS imm8, m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPS imm8, {sae}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPS imm8, zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPS imm8, m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPS imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPS imm8, m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x54)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMPS imm8, ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x54)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFIXUPIMMPS")
+ }
+ return p
+}
+
+// VFIXUPIMMSD performs "Fix Up Special Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VFIXUPIMMSD
+// Supported forms : (3 forms)
+//
+// * VFIXUPIMMSD imm8, m64, xmm, xmm{k}{z} [AVX512F]
+// * VFIXUPIMMSD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFIXUPIMMSD imm8, xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFIXUPIMMSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFIXUPIMMSD", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VFIXUPIMMSD", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VFIXUPIMMSD takes 4 or 5 operands")
+ }
+ // VFIXUPIMMSD imm8, m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x55)
+ m.mrsd(lcode(v[3]), addr(v[1]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMSD imm8, {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMSD imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFIXUPIMMSD")
+ }
+ return p
+}
+
+// VFIXUPIMMSS performs "Fix Up Special Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VFIXUPIMMSS
+// Supported forms : (3 forms)
+//
+// * VFIXUPIMMSS imm8, m32, xmm, xmm{k}{z} [AVX512F]
+// * VFIXUPIMMSS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFIXUPIMMSS imm8, xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFIXUPIMMSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFIXUPIMMSS", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VFIXUPIMMSS", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VFIXUPIMMSS takes 4 or 5 operands")
+ }
+ // VFIXUPIMMSS imm8, m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x55)
+ m.mrsd(lcode(v[3]), addr(v[1]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMSS imm8, {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFIXUPIMMSS imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFIXUPIMMSS")
+ }
+ return p
+}
+
+// VFMADD132PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD132PD
+// Supported forms : (11 forms)
+//
+// * VFMADD132PD xmm, xmm, xmm [FMA3]
+// * VFMADD132PD m128, xmm, xmm [FMA3]
+// * VFMADD132PD ymm, ymm, ymm [FMA3]
+// * VFMADD132PD m256, ymm, ymm [FMA3]
+// * VFMADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD132PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD132PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD132PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD132PD takes 3 or 4 operands")
+ }
+ // VFMADD132PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD132PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD132PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADD132PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD132PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADD132PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADD132PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD132PD")
+ }
+ return p
+}
+
+// VFMADD132PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD132PS
+// Supported forms : (11 forms)
+//
+// * VFMADD132PS xmm, xmm, xmm [FMA3]
+// * VFMADD132PS m128, xmm, xmm [FMA3]
+// * VFMADD132PS ymm, ymm, ymm [FMA3]
+// * VFMADD132PS m256, ymm, ymm [FMA3]
+// * VFMADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD132PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD132PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD132PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD132PS takes 3 or 4 operands")
+ }
+ // VFMADD132PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD132PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD132PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADD132PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD132PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADD132PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADD132PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD132PS")
+ }
+ return p
+}
+
+// VFMADD132SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD132SD
+// Supported forms : (5 forms)
+//
+// * VFMADD132SD xmm, xmm, xmm [FMA3]
+// * VFMADD132SD m64, xmm, xmm [FMA3]
+// * VFMADD132SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD132SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMADD132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD132SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD132SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD132SD takes 3 or 4 operands")
+ }
+ // VFMADD132SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x99)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD132SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x99)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFMADD132SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD132SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD132SD")
+ }
+ return p
+}
+
+// VFMADD132SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD132SS
+// Supported forms : (5 forms)
+//
+// * VFMADD132SS xmm, xmm, xmm [FMA3]
+// * VFMADD132SS m32, xmm, xmm [FMA3]
+// * VFMADD132SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD132SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMADD132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD132SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD132SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD132SS takes 3 or 4 operands")
+ }
+ // VFMADD132SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD132SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x99)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD132SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x99)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFMADD132SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD132SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD132SS")
+ }
+ return p
+}
+
+// VFMADD213PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD213PD
+// Supported forms : (11 forms)
+//
+// * VFMADD213PD xmm, xmm, xmm [FMA3]
+// * VFMADD213PD m128, xmm, xmm [FMA3]
+// * VFMADD213PD ymm, ymm, ymm [FMA3]
+// * VFMADD213PD m256, ymm, ymm [FMA3]
+// * VFMADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD213PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD213PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD213PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD213PD takes 3 or 4 operands")
+ }
+ // VFMADD213PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD213PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD213PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADD213PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD213PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADD213PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADD213PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD213PD")
+ }
+ return p
+}
+
+// VFMADD213PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD213PS
+// Supported forms : (11 forms)
+//
+// * VFMADD213PS xmm, xmm, xmm [FMA3]
+// * VFMADD213PS m128, xmm, xmm [FMA3]
+// * VFMADD213PS ymm, ymm, ymm [FMA3]
+// * VFMADD213PS m256, ymm, ymm [FMA3]
+// * VFMADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD213PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD213PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD213PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD213PS takes 3 or 4 operands")
+ }
+ // VFMADD213PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD213PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD213PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADD213PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD213PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADD213PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADD213PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xa8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD213PS")
+ }
+ return p
+}
+
+// VFMADD213SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD213SD
+// Supported forms : (5 forms)
+//
+// * VFMADD213SD xmm, xmm, xmm [FMA3]
+// * VFMADD213SD m64, xmm, xmm [FMA3]
+// * VFMADD213SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD213SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMADD213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD213SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD213SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD213SD takes 3 or 4 operands")
+ }
+ // VFMADD213SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xa9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD213SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xa9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFMADD213SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xa9)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD213SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xa9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD213SD")
+ }
+ return p
+}
+
+// VFMADD213SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD213SS
+// Supported forms : (5 forms)
+//
+// * VFMADD213SS xmm, xmm, xmm [FMA3]
+// * VFMADD213SS m32, xmm, xmm [FMA3]
+// * VFMADD213SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD213SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMADD213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD213SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD213SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD213SS takes 3 or 4 operands")
+ }
+ // VFMADD213SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xa9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD213SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD213SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xa9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFMADD213SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xa9)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD213SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xa9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD213SS")
+ }
+ return p
+}
+
+// VFMADD231PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD231PD
+// Supported forms : (11 forms)
+//
+// * VFMADD231PD xmm, xmm, xmm [FMA3]
+// * VFMADD231PD m128, xmm, xmm [FMA3]
+// * VFMADD231PD ymm, ymm, ymm [FMA3]
+// * VFMADD231PD m256, ymm, ymm [FMA3]
+// * VFMADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD231PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD231PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD231PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD231PD takes 3 or 4 operands")
+ }
+ // VFMADD231PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD231PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD231PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADD231PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD231PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADD231PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADD231PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD231PD")
+ }
+ return p
+}
+
+// VFMADD231PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD231PS
+// Supported forms : (11 forms)
+//
+// * VFMADD231PS xmm, xmm, xmm [FMA3]
+// * VFMADD231PS m128, xmm, xmm [FMA3]
+// * VFMADD231PS ymm, ymm, ymm [FMA3]
+// * VFMADD231PS m256, ymm, ymm [FMA3]
+// * VFMADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD231PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD231PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD231PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD231PS takes 3 or 4 operands")
+ }
+ // VFMADD231PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD231PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD231PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADD231PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD231PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADD231PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADD231PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xb8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD231PS")
+ }
+ return p
+}
+
+// VFMADD231SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD231SD
+// Supported forms : (5 forms)
+//
+// * VFMADD231SD xmm, xmm, xmm [FMA3]
+// * VFMADD231SD m64, xmm, xmm [FMA3]
+// * VFMADD231SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD231SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMADD231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD231SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD231SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD231SD takes 3 or 4 operands")
+ }
+ // VFMADD231SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xb9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD231SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xb9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFMADD231SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xb9)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD231SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD231SD")
+ }
+ return p
+}
+
+// VFMADD231SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADD231SS
+// Supported forms : (5 forms)
+//
+// * VFMADD231SS xmm, xmm, xmm [FMA3]
+// * VFMADD231SS m32, xmm, xmm [FMA3]
+// * VFMADD231SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMADD231SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMADD231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADD231SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADD231SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADD231SS takes 3 or 4 operands")
+ }
+ // VFMADD231SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xb9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADD231SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADD231SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xb9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFMADD231SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xb9)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADD231SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADD231SS")
+ }
+ return p
+}
+
+// VFMADDPD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDPD
+// Supported forms : (6 forms)
+//
+// * VFMADDPD xmm, xmm, xmm, xmm [FMA4]
+// * VFMADDPD m128, xmm, xmm, xmm [FMA4]
+// * VFMADDPD xmm, m128, xmm, xmm [FMA4]
+// * VFMADDPD ymm, ymm, ymm, ymm [FMA4]
+// * VFMADDPD m256, ymm, ymm, ymm [FMA4]
+// * VFMADDPD ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFMADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMADDPD", 4, Operands { v0, v1, v2, v3 })
+ // VFMADDPD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDPD m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x69)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDPD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x69)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDPD ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDPD m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x69)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDPD ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x69)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDPD")
+ }
+ return p
+}
+
+// VFMADDPS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDPS
+// Supported forms : (6 forms)
+//
+// * VFMADDPS xmm, xmm, xmm, xmm [FMA4]
+// * VFMADDPS m128, xmm, xmm, xmm [FMA4]
+// * VFMADDPS xmm, m128, xmm, xmm [FMA4]
+// * VFMADDPS ymm, ymm, ymm, ymm [FMA4]
+// * VFMADDPS m256, ymm, ymm, ymm [FMA4]
+// * VFMADDPS ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFMADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMADDPS", 4, Operands { v0, v1, v2, v3 })
+ // VFMADDPS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDPS m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x68)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDPS xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x68)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDPS ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDPS m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x68)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDPS ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x68)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDPS")
+ }
+ return p
+}
+
+// VFMADDSD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSD
+// Supported forms : (3 forms)
+//
+// * VFMADDSD xmm, xmm, xmm, xmm [FMA4]
+// * VFMADDSD m64, xmm, xmm, xmm [FMA4]
+// * VFMADDSD xmm, m64, xmm, xmm [FMA4]
+//
+func (self *Program) VFMADDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMADDSD", 4, Operands { v0, v1, v2, v3 })
+ // VFMADDSD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDSD m64, xmm, xmm, xmm
+ if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x6b)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDSD xmm, m64, xmm, xmm
+ if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x6b)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSD")
+ }
+ return p
+}
+
+// VFMADDSS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSS
+// Supported forms : (3 forms)
+//
+// * VFMADDSS xmm, xmm, xmm, xmm [FMA4]
+// * VFMADDSS m32, xmm, xmm, xmm [FMA4]
+// * VFMADDSS xmm, m32, xmm, xmm [FMA4]
+//
+func (self *Program) VFMADDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMADDSS", 4, Operands { v0, v1, v2, v3 })
+ // VFMADDSS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDSS m32, xmm, xmm, xmm
+ if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x6a)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDSS xmm, m32, xmm, xmm
+ if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x6a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSS")
+ }
+ return p
+}
+
+// VFMADDSUB132PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSUB132PD
+// Supported forms : (11 forms)
+//
+// * VFMADDSUB132PD xmm, xmm, xmm [FMA3]
+// * VFMADDSUB132PD m128, xmm, xmm [FMA3]
+// * VFMADDSUB132PD ymm, ymm, ymm [FMA3]
+// * VFMADDSUB132PD m256, ymm, ymm [FMA3]
+// * VFMADDSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB132PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADDSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADDSUB132PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADDSUB132PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADDSUB132PD takes 3 or 4 operands")
+ }
+ // VFMADDSUB132PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB132PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB132PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB132PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB132PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADDSUB132PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADDSUB132PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB132PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADDSUB132PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB132PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADDSUB132PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSUB132PD")
+ }
+ return p
+}
+
+// VFMADDSUB132PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSUB132PS
+// Supported forms : (11 forms)
+//
+// * VFMADDSUB132PS xmm, xmm, xmm [FMA3]
+// * VFMADDSUB132PS m128, xmm, xmm [FMA3]
+// * VFMADDSUB132PS ymm, ymm, ymm [FMA3]
+// * VFMADDSUB132PS m256, ymm, ymm [FMA3]
+// * VFMADDSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB132PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADDSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADDSUB132PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADDSUB132PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADDSUB132PS takes 3 or 4 operands")
+ }
+ // VFMADDSUB132PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB132PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB132PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB132PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB132PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADDSUB132PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADDSUB132PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB132PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADDSUB132PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB132PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADDSUB132PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSUB132PS")
+ }
+ return p
+}
+
+// VFMADDSUB213PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSUB213PD
+// Supported forms : (11 forms)
+//
+// * VFMADDSUB213PD xmm, xmm, xmm [FMA3]
+// * VFMADDSUB213PD m128, xmm, xmm [FMA3]
+// * VFMADDSUB213PD ymm, ymm, ymm [FMA3]
+// * VFMADDSUB213PD m256, ymm, ymm [FMA3]
+// * VFMADDSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB213PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADDSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADDSUB213PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADDSUB213PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADDSUB213PD takes 3 or 4 operands")
+ }
+ // VFMADDSUB213PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB213PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB213PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB213PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB213PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADDSUB213PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADDSUB213PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB213PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADDSUB213PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB213PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADDSUB213PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSUB213PD")
+ }
+ return p
+}
+
+// VFMADDSUB213PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSUB213PS
+// Supported forms : (11 forms)
+//
+// * VFMADDSUB213PS xmm, xmm, xmm [FMA3]
+// * VFMADDSUB213PS m128, xmm, xmm [FMA3]
+// * VFMADDSUB213PS ymm, ymm, ymm [FMA3]
+// * VFMADDSUB213PS m256, ymm, ymm [FMA3]
+// * VFMADDSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB213PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADDSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADDSUB213PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADDSUB213PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADDSUB213PS takes 3 or 4 operands")
+ }
+ // VFMADDSUB213PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB213PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB213PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB213PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB213PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADDSUB213PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADDSUB213PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB213PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADDSUB213PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB213PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADDSUB213PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSUB213PS")
+ }
+ return p
+}
+
+// VFMADDSUB231PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSUB231PD
+// Supported forms : (11 forms)
+//
+// * VFMADDSUB231PD xmm, xmm, xmm [FMA3]
+// * VFMADDSUB231PD m128, xmm, xmm [FMA3]
+// * VFMADDSUB231PD ymm, ymm, ymm [FMA3]
+// * VFMADDSUB231PD m256, ymm, ymm [FMA3]
+// * VFMADDSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB231PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADDSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADDSUB231PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADDSUB231PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADDSUB231PD takes 3 or 4 operands")
+ }
+ // VFMADDSUB231PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB231PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB231PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB231PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB231PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADDSUB231PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADDSUB231PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB231PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADDSUB231PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB231PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADDSUB231PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSUB231PD")
+ }
+ return p
+}
+
+// VFMADDSUB231PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSUB231PS
+// Supported forms : (11 forms)
+//
+// * VFMADDSUB231PS xmm, xmm, xmm [FMA3]
+// * VFMADDSUB231PS m128, xmm, xmm [FMA3]
+// * VFMADDSUB231PS ymm, ymm, ymm [FMA3]
+// * VFMADDSUB231PS m256, ymm, ymm [FMA3]
+// * VFMADDSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB231PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMADDSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMADDSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMADDSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMADDSUB231PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMADDSUB231PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMADDSUB231PS takes 3 or 4 operands")
+ }
+ // VFMADDSUB231PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB231PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB231PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB231PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMADDSUB231PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMADDSUB231PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMADDSUB231PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB231PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMADDSUB231PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMADDSUB231PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMADDSUB231PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSUB231PS")
+ }
+ return p
+}
+
+// VFMADDSUBPD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSUBPD
+// Supported forms : (6 forms)
+//
+// * VFMADDSUBPD xmm, xmm, xmm, xmm [FMA4]
+// * VFMADDSUBPD m128, xmm, xmm, xmm [FMA4]
+// * VFMADDSUBPD xmm, m128, xmm, xmm [FMA4]
+// * VFMADDSUBPD ymm, ymm, ymm, ymm [FMA4]
+// * VFMADDSUBPD m256, ymm, ymm, ymm [FMA4]
+// * VFMADDSUBPD ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFMADDSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMADDSUBPD", 4, Operands { v0, v1, v2, v3 })
+ // VFMADDSUBPD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDSUBPD m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDSUBPD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDSUBPD ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDSUBPD m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDSUBPD ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSUBPD")
+ }
+ return p
+}
+
+// VFMADDSUBPS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMADDSUBPS
+// Supported forms : (6 forms)
+//
+// * VFMADDSUBPS xmm, xmm, xmm, xmm [FMA4]
+// * VFMADDSUBPS m128, xmm, xmm, xmm [FMA4]
+// * VFMADDSUBPS xmm, m128, xmm, xmm [FMA4]
+// * VFMADDSUBPS ymm, ymm, ymm, ymm [FMA4]
+// * VFMADDSUBPS m256, ymm, ymm, ymm [FMA4]
+// * VFMADDSUBPS ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFMADDSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMADDSUBPS", 4, Operands { v0, v1, v2, v3 })
+ // VFMADDSUBPS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDSUBPS m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDSUBPS xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDSUBPS ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMADDSUBPS m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMADDSUBPS ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMADDSUBPS")
+ }
+ return p
+}
+
+// VFMSUB132PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB132PD
+// Supported forms : (11 forms)
+//
+// * VFMSUB132PD xmm, xmm, xmm [FMA3]
+// * VFMSUB132PD m128, xmm, xmm [FMA3]
+// * VFMSUB132PD ymm, ymm, ymm [FMA3]
+// * VFMSUB132PD m256, ymm, ymm [FMA3]
+// * VFMSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB132PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB132PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB132PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB132PD takes 3 or 4 operands")
+ }
+ // VFMSUB132PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB132PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB132PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUB132PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB132PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUB132PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUB132PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB132PD")
+ }
+ return p
+}
+
+// VFMSUB132PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB132PS
+// Supported forms : (11 forms)
+//
+// * VFMSUB132PS xmm, xmm, xmm [FMA3]
+// * VFMSUB132PS m128, xmm, xmm [FMA3]
+// * VFMSUB132PS ymm, ymm, ymm [FMA3]
+// * VFMSUB132PS m256, ymm, ymm [FMA3]
+// * VFMSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB132PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB132PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB132PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB132PS takes 3 or 4 operands")
+ }
+ // VFMSUB132PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB132PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB132PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUB132PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB132PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUB132PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUB132PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB132PS")
+ }
+ return p
+}
+
+// VFMSUB132SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB132SD
+// Supported forms : (5 forms)
+//
+// * VFMSUB132SD xmm, xmm, xmm [FMA3]
+// * VFMSUB132SD m64, xmm, xmm [FMA3]
+// * VFMSUB132SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB132SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMSUB132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB132SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB132SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB132SD takes 3 or 4 operands")
+ }
+ // VFMSUB132SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB132SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x9b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFMSUB132SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB132SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB132SD")
+ }
+ return p
+}
+
+// VFMSUB132SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB132SS
+// Supported forms : (5 forms)
+//
+// * VFMSUB132SS xmm, xmm, xmm [FMA3]
+// * VFMSUB132SS m32, xmm, xmm [FMA3]
+// * VFMSUB132SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB132SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMSUB132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB132SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB132SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB132SS takes 3 or 4 operands")
+ }
+ // VFMSUB132SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB132SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB132SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x9b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFMSUB132SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB132SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB132SS")
+ }
+ return p
+}
+
+// VFMSUB213PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB213PD
+// Supported forms : (11 forms)
+//
+// * VFMSUB213PD xmm, xmm, xmm [FMA3]
+// * VFMSUB213PD m128, xmm, xmm [FMA3]
+// * VFMSUB213PD ymm, ymm, ymm [FMA3]
+// * VFMSUB213PD m256, ymm, ymm [FMA3]
+// * VFMSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB213PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB213PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB213PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB213PD takes 3 or 4 operands")
+ }
+ // VFMSUB213PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB213PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB213PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUB213PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB213PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUB213PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUB213PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB213PD")
+ }
+ return p
+}
+
+// VFMSUB213PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB213PS
+// Supported forms : (11 forms)
+//
+// * VFMSUB213PS xmm, xmm, xmm [FMA3]
+// * VFMSUB213PS m128, xmm, xmm [FMA3]
+// * VFMSUB213PS ymm, ymm, ymm [FMA3]
+// * VFMSUB213PS m256, ymm, ymm [FMA3]
+// * VFMSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB213PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB213PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB213PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB213PS takes 3 or 4 operands")
+ }
+ // VFMSUB213PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB213PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB213PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUB213PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB213PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUB213PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xaa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUB213PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xaa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB213PS")
+ }
+ return p
+}
+
+// VFMSUB213SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB213SD
+// Supported forms : (5 forms)
+//
+// * VFMSUB213SD xmm, xmm, xmm [FMA3]
+// * VFMSUB213SD m64, xmm, xmm [FMA3]
+// * VFMSUB213SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB213SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMSUB213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB213SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB213SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB213SD takes 3 or 4 operands")
+ }
+ // VFMSUB213SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xab)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xab)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB213SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xab)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFMSUB213SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xab)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB213SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xab)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB213SD")
+ }
+ return p
+}
+
+// VFMSUB213SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB213SS
+// Supported forms : (5 forms)
+//
+// * VFMSUB213SS xmm, xmm, xmm [FMA3]
+// * VFMSUB213SS m32, xmm, xmm [FMA3]
+// * VFMSUB213SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB213SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMSUB213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB213SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB213SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB213SS takes 3 or 4 operands")
+ }
+ // VFMSUB213SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xab)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB213SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xab)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB213SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xab)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFMSUB213SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xab)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB213SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xab)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB213SS")
+ }
+ return p
+}
+
+// VFMSUB231PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB231PD
+// Supported forms : (11 forms)
+//
+// * VFMSUB231PD xmm, xmm, xmm [FMA3]
+// * VFMSUB231PD m128, xmm, xmm [FMA3]
+// * VFMSUB231PD ymm, ymm, ymm [FMA3]
+// * VFMSUB231PD m256, ymm, ymm [FMA3]
+// * VFMSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB231PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB231PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB231PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB231PD takes 3 or 4 operands")
+ }
+ // VFMSUB231PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB231PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB231PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUB231PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB231PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUB231PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUB231PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB231PD")
+ }
+ return p
+}
+
+// VFMSUB231PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB231PS
+// Supported forms : (11 forms)
+//
+// * VFMSUB231PS xmm, xmm, xmm [FMA3]
+// * VFMSUB231PS m128, xmm, xmm [FMA3]
+// * VFMSUB231PS ymm, ymm, ymm [FMA3]
+// * VFMSUB231PS m256, ymm, ymm [FMA3]
+// * VFMSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB231PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB231PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB231PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB231PS takes 3 or 4 operands")
+ }
+ // VFMSUB231PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB231PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB231PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUB231PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB231PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUB231PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xba)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUB231PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xba)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB231PS")
+ }
+ return p
+}
+
+// VFMSUB231SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB231SD
+// Supported forms : (5 forms)
+//
+// * VFMSUB231SD xmm, xmm, xmm [FMA3]
+// * VFMSUB231SD m64, xmm, xmm [FMA3]
+// * VFMSUB231SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB231SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMSUB231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB231SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB231SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB231SD takes 3 or 4 operands")
+ }
+ // VFMSUB231SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB231SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xbb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFMSUB231SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbb)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB231SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB231SD")
+ }
+ return p
+}
+
+// VFMSUB231SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUB231SS
+// Supported forms : (5 forms)
+//
+// * VFMSUB231SS xmm, xmm, xmm [FMA3]
+// * VFMSUB231SS m32, xmm, xmm [FMA3]
+// * VFMSUB231SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFMSUB231SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFMSUB231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUB231SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUB231SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUB231SS takes 3 or 4 operands")
+ }
+ // VFMSUB231SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUB231SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUB231SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xbb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFMSUB231SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbb)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUB231SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUB231SS")
+ }
+ return p
+}
+
+// VFMSUBADD132PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBADD132PD
+// Supported forms : (11 forms)
+//
+// * VFMSUBADD132PD xmm, xmm, xmm [FMA3]
+// * VFMSUBADD132PD m128, xmm, xmm [FMA3]
+// * VFMSUBADD132PD ymm, ymm, ymm [FMA3]
+// * VFMSUBADD132PD m256, ymm, ymm [FMA3]
+// * VFMSUBADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD132PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUBADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUBADD132PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUBADD132PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUBADD132PD takes 3 or 4 operands")
+ }
+ // VFMSUBADD132PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD132PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD132PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD132PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD132PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUBADD132PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUBADD132PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD132PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUBADD132PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD132PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUBADD132PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBADD132PD")
+ }
+ return p
+}
+
+// VFMSUBADD132PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBADD132PS
+// Supported forms : (11 forms)
+//
+// * VFMSUBADD132PS xmm, xmm, xmm [FMA3]
+// * VFMSUBADD132PS m128, xmm, xmm [FMA3]
+// * VFMSUBADD132PS ymm, ymm, ymm [FMA3]
+// * VFMSUBADD132PS m256, ymm, ymm [FMA3]
+// * VFMSUBADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD132PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUBADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUBADD132PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUBADD132PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUBADD132PS takes 3 or 4 operands")
+ }
+ // VFMSUBADD132PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD132PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD132PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD132PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD132PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUBADD132PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUBADD132PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD132PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUBADD132PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD132PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUBADD132PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBADD132PS")
+ }
+ return p
+}
+
+// VFMSUBADD213PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBADD213PD
+// Supported forms : (11 forms)
+//
+// * VFMSUBADD213PD xmm, xmm, xmm [FMA3]
+// * VFMSUBADD213PD m128, xmm, xmm [FMA3]
+// * VFMSUBADD213PD ymm, ymm, ymm [FMA3]
+// * VFMSUBADD213PD m256, ymm, ymm [FMA3]
+// * VFMSUBADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD213PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUBADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUBADD213PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUBADD213PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUBADD213PD takes 3 or 4 operands")
+ }
+ // VFMSUBADD213PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD213PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD213PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD213PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD213PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUBADD213PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUBADD213PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD213PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUBADD213PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD213PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUBADD213PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBADD213PD")
+ }
+ return p
+}
+
+// VFMSUBADD213PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBADD213PS
+// Supported forms : (11 forms)
+//
+// * VFMSUBADD213PS xmm, xmm, xmm [FMA3]
+// * VFMSUBADD213PS m128, xmm, xmm [FMA3]
+// * VFMSUBADD213PS ymm, ymm, ymm [FMA3]
+// * VFMSUBADD213PS m256, ymm, ymm [FMA3]
+// * VFMSUBADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD213PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUBADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUBADD213PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUBADD213PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUBADD213PS takes 3 or 4 operands")
+ }
+ // VFMSUBADD213PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD213PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD213PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD213PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD213PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUBADD213PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUBADD213PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD213PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUBADD213PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD213PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xa7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUBADD213PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xa7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBADD213PS")
+ }
+ return p
+}
+
+// VFMSUBADD231PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBADD231PD
+// Supported forms : (11 forms)
+//
+// * VFMSUBADD231PD xmm, xmm, xmm [FMA3]
+// * VFMSUBADD231PD m128, xmm, xmm [FMA3]
+// * VFMSUBADD231PD ymm, ymm, ymm [FMA3]
+// * VFMSUBADD231PD m256, ymm, ymm [FMA3]
+// * VFMSUBADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD231PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUBADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUBADD231PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUBADD231PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUBADD231PD takes 3 or 4 operands")
+ }
+ // VFMSUBADD231PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD231PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD231PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD231PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD231PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUBADD231PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUBADD231PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD231PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUBADD231PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD231PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUBADD231PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBADD231PD")
+ }
+ return p
+}
+
+// VFMSUBADD231PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBADD231PS
+// Supported forms : (11 forms)
+//
+// * VFMSUBADD231PS xmm, xmm, xmm [FMA3]
+// * VFMSUBADD231PS m128, xmm, xmm [FMA3]
+// * VFMSUBADD231PS ymm, ymm, ymm [FMA3]
+// * VFMSUBADD231PS m256, ymm, ymm [FMA3]
+// * VFMSUBADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD231PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFMSUBADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFMSUBADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFMSUBADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFMSUBADD231PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFMSUBADD231PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFMSUBADD231PS takes 3 or 4 operands")
+ }
+ // VFMSUBADD231PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD231PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD231PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD231PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFMSUBADD231PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFMSUBADD231PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFMSUBADD231PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD231PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFMSUBADD231PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFMSUBADD231PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb7)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFMSUBADD231PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xb7)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBADD231PS")
+ }
+ return p
+}
+
+// VFMSUBADDPD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBADDPD
+// Supported forms : (6 forms)
+//
+// * VFMSUBADDPD xmm, xmm, xmm, xmm [FMA4]
+// * VFMSUBADDPD m128, xmm, xmm, xmm [FMA4]
+// * VFMSUBADDPD xmm, m128, xmm, xmm [FMA4]
+// * VFMSUBADDPD ymm, ymm, ymm, ymm [FMA4]
+// * VFMSUBADDPD m256, ymm, ymm, ymm [FMA4]
+// * VFMSUBADDPD ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFMSUBADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMSUBADDPD", 4, Operands { v0, v1, v2, v3 })
+ // VFMSUBADDPD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBADDPD m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBADDPD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBADDPD ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBADDPD m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBADDPD ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBADDPD")
+ }
+ return p
+}
+
+// VFMSUBADDPS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBADDPS
+// Supported forms : (6 forms)
+//
+// * VFMSUBADDPS xmm, xmm, xmm, xmm [FMA4]
+// * VFMSUBADDPS m128, xmm, xmm, xmm [FMA4]
+// * VFMSUBADDPS xmm, m128, xmm, xmm [FMA4]
+// * VFMSUBADDPS ymm, ymm, ymm, ymm [FMA4]
+// * VFMSUBADDPS m256, ymm, ymm, ymm [FMA4]
+// * VFMSUBADDPS ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFMSUBADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMSUBADDPS", 4, Operands { v0, v1, v2, v3 })
+ // VFMSUBADDPS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBADDPS m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBADDPS xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBADDPS ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x5e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBADDPS m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBADDPS ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x5e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBADDPS")
+ }
+ return p
+}
+
+// VFMSUBPD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBPD
+// Supported forms : (6 forms)
+//
+// * VFMSUBPD xmm, xmm, xmm, xmm [FMA4]
+// * VFMSUBPD m128, xmm, xmm, xmm [FMA4]
+// * VFMSUBPD xmm, m128, xmm, xmm [FMA4]
+// * VFMSUBPD ymm, ymm, ymm, ymm [FMA4]
+// * VFMSUBPD m256, ymm, ymm, ymm [FMA4]
+// * VFMSUBPD ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFMSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMSUBPD", 4, Operands { v0, v1, v2, v3 })
+ // VFMSUBPD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBPD m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x6d)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBPD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x6d)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBPD ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBPD m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x6d)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBPD ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x6d)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBPD")
+ }
+ return p
+}
+
+// VFMSUBPS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBPS
+// Supported forms : (6 forms)
+//
+// * VFMSUBPS xmm, xmm, xmm, xmm [FMA4]
+// * VFMSUBPS m128, xmm, xmm, xmm [FMA4]
+// * VFMSUBPS xmm, m128, xmm, xmm [FMA4]
+// * VFMSUBPS ymm, ymm, ymm, ymm [FMA4]
+// * VFMSUBPS m256, ymm, ymm, ymm [FMA4]
+// * VFMSUBPS ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFMSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMSUBPS", 4, Operands { v0, v1, v2, v3 })
+ // VFMSUBPS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBPS m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x6c)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBPS xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x6c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBPS ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBPS m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x6c)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBPS ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x6c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBPS")
+ }
+ return p
+}
+
+// VFMSUBSD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBSD
+// Supported forms : (3 forms)
+//
+// * VFMSUBSD xmm, xmm, xmm, xmm [FMA4]
+// * VFMSUBSD m64, xmm, xmm, xmm [FMA4]
+// * VFMSUBSD xmm, m64, xmm, xmm [FMA4]
+//
+func (self *Program) VFMSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMSUBSD", 4, Operands { v0, v1, v2, v3 })
+ // VFMSUBSD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBSD m64, xmm, xmm, xmm
+ if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x6f)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBSD xmm, m64, xmm, xmm
+ if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x6f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBSD")
+ }
+ return p
+}
+
+// VFMSUBSS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFMSUBSS
+// Supported forms : (3 forms)
+//
+// * VFMSUBSS xmm, xmm, xmm, xmm [FMA4]
+// * VFMSUBSS m32, xmm, xmm, xmm [FMA4]
+// * VFMSUBSS xmm, m32, xmm, xmm [FMA4]
+//
+func (self *Program) VFMSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFMSUBSS", 4, Operands { v0, v1, v2, v3 })
+ // VFMSUBSS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFMSUBSS m32, xmm, xmm, xmm
+ if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x6e)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFMSUBSS xmm, m32, xmm, xmm
+ if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x6e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFMSUBSS")
+ }
+ return p
+}
+
+// VFNMADD132PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD132PD
+// Supported forms : (11 forms)
+//
+// * VFNMADD132PD xmm, xmm, xmm [FMA3]
+// * VFNMADD132PD m128, xmm, xmm [FMA3]
+// * VFNMADD132PD ymm, ymm, ymm [FMA3]
+// * VFNMADD132PD m256, ymm, ymm [FMA3]
+// * VFNMADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD132PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD132PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD132PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD132PD takes 3 or 4 operands")
+ }
+ // VFNMADD132PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD132PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD132PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMADD132PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD132PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMADD132PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMADD132PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD132PD")
+ }
+ return p
+}
+
+// VFNMADD132PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD132PS
+// Supported forms : (11 forms)
+//
+// * VFNMADD132PS xmm, xmm, xmm [FMA3]
+// * VFNMADD132PS m128, xmm, xmm [FMA3]
+// * VFNMADD132PS ymm, ymm, ymm [FMA3]
+// * VFNMADD132PS m256, ymm, ymm [FMA3]
+// * VFNMADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD132PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD132PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD132PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD132PS takes 3 or 4 operands")
+ }
+ // VFNMADD132PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD132PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD132PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMADD132PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD132PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMADD132PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMADD132PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x9c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD132PS")
+ }
+ return p
+}
+
+// VFNMADD132SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD132SD
+// Supported forms : (5 forms)
+//
+// * VFNMADD132SD xmm, xmm, xmm [FMA3]
+// * VFNMADD132SD m64, xmm, xmm [FMA3]
+// * VFNMADD132SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD132SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMADD132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD132SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD132SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD132SD takes 3 or 4 operands")
+ }
+ // VFNMADD132SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD132SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x9d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFNMADD132SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD132SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD132SD")
+ }
+ return p
+}
+
+// VFNMADD132SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD132SS
+// Supported forms : (5 forms)
+//
+// * VFNMADD132SS xmm, xmm, xmm [FMA3]
+// * VFNMADD132SS m32, xmm, xmm [FMA3]
+// * VFNMADD132SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD132SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMADD132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD132SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD132SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD132SS takes 3 or 4 operands")
+ }
+ // VFNMADD132SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD132SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD132SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x9d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFNMADD132SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD132SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD132SS")
+ }
+ return p
+}
+
+// VFNMADD213PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD213PD
+// Supported forms : (11 forms)
+//
+// * VFNMADD213PD xmm, xmm, xmm [FMA3]
+// * VFNMADD213PD m128, xmm, xmm [FMA3]
+// * VFNMADD213PD ymm, ymm, ymm [FMA3]
+// * VFNMADD213PD m256, ymm, ymm [FMA3]
+// * VFNMADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD213PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD213PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD213PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD213PD takes 3 or 4 operands")
+ }
+ // VFNMADD213PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD213PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD213PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMADD213PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD213PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMADD213PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMADD213PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD213PD")
+ }
+ return p
+}
+
+// VFNMADD213PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD213PS
+// Supported forms : (11 forms)
+//
+// * VFNMADD213PS xmm, xmm, xmm [FMA3]
+// * VFNMADD213PS m128, xmm, xmm [FMA3]
+// * VFNMADD213PS ymm, ymm, ymm [FMA3]
+// * VFNMADD213PS m256, ymm, ymm [FMA3]
+// * VFNMADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD213PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD213PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD213PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD213PS takes 3 or 4 operands")
+ }
+ // VFNMADD213PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD213PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD213PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMADD213PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD213PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMADD213PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xac)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMADD213PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xac)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD213PS")
+ }
+ return p
+}
+
+// VFNMADD213SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD213SD
+// Supported forms : (5 forms)
+//
+// * VFNMADD213SD xmm, xmm, xmm [FMA3]
+// * VFNMADD213SD m64, xmm, xmm [FMA3]
+// * VFNMADD213SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD213SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMADD213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD213SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD213SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD213SD takes 3 or 4 operands")
+ }
+ // VFNMADD213SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xad)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xad)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD213SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xad)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFNMADD213SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xad)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD213SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xad)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD213SD")
+ }
+ return p
+}
+
+// VFNMADD213SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD213SS
+// Supported forms : (5 forms)
+//
+// * VFNMADD213SS xmm, xmm, xmm [FMA3]
+// * VFNMADD213SS m32, xmm, xmm [FMA3]
+// * VFNMADD213SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD213SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMADD213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD213SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD213SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD213SS takes 3 or 4 operands")
+ }
+ // VFNMADD213SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xad)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD213SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xad)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD213SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xad)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFNMADD213SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xad)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD213SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xad)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD213SS")
+ }
+ return p
+}
+
+// VFNMADD231PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD231PD
+// Supported forms : (11 forms)
+//
+// * VFNMADD231PD xmm, xmm, xmm [FMA3]
+// * VFNMADD231PD m128, xmm, xmm [FMA3]
+// * VFNMADD231PD ymm, ymm, ymm [FMA3]
+// * VFNMADD231PD m256, ymm, ymm [FMA3]
+// * VFNMADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD231PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD231PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD231PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD231PD takes 3 or 4 operands")
+ }
+ // VFNMADD231PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD231PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD231PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMADD231PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD231PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMADD231PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMADD231PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD231PD")
+ }
+ return p
+}
+
+// VFNMADD231PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD231PS
+// Supported forms : (11 forms)
+//
+// * VFNMADD231PS xmm, xmm, xmm [FMA3]
+// * VFNMADD231PS m128, xmm, xmm [FMA3]
+// * VFNMADD231PS ymm, ymm, ymm [FMA3]
+// * VFNMADD231PS m256, ymm, ymm [FMA3]
+// * VFNMADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD231PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD231PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD231PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD231PS takes 3 or 4 operands")
+ }
+ // VFNMADD231PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD231PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD231PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMADD231PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD231PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMADD231PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMADD231PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xbc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD231PS")
+ }
+ return p
+}
+
+// VFNMADD231SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD231SD
+// Supported forms : (5 forms)
+//
+// * VFNMADD231SD xmm, xmm, xmm [FMA3]
+// * VFNMADD231SD m64, xmm, xmm [FMA3]
+// * VFNMADD231SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD231SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMADD231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD231SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD231SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD231SD takes 3 or 4 operands")
+ }
+ // VFNMADD231SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD231SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xbd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFNMADD231SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD231SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD231SD")
+ }
+ return p
+}
+
+// VFNMADD231SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADD231SS
+// Supported forms : (5 forms)
+//
+// * VFNMADD231SS xmm, xmm, xmm [FMA3]
+// * VFNMADD231SS m32, xmm, xmm [FMA3]
+// * VFNMADD231SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMADD231SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMADD231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMADD231SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMADD231SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMADD231SS takes 3 or 4 operands")
+ }
+ // VFNMADD231SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMADD231SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMADD231SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xbd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFNMADD231SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMADD231SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADD231SS")
+ }
+ return p
+}
+
+// VFNMADDPD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADDPD
+// Supported forms : (6 forms)
+//
+// * VFNMADDPD xmm, xmm, xmm, xmm [FMA4]
+// * VFNMADDPD m128, xmm, xmm, xmm [FMA4]
+// * VFNMADDPD xmm, m128, xmm, xmm [FMA4]
+// * VFNMADDPD ymm, ymm, ymm, ymm [FMA4]
+// * VFNMADDPD m256, ymm, ymm, ymm [FMA4]
+// * VFNMADDPD ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFNMADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFNMADDPD", 4, Operands { v0, v1, v2, v3 })
+ // VFNMADDPD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMADDPD m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMADDPD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMADDPD ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMADDPD m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMADDPD ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x79)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADDPD")
+ }
+ return p
+}
+
+// VFNMADDPS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADDPS
+// Supported forms : (6 forms)
+//
+// * VFNMADDPS xmm, xmm, xmm, xmm [FMA4]
+// * VFNMADDPS m128, xmm, xmm, xmm [FMA4]
+// * VFNMADDPS xmm, m128, xmm, xmm [FMA4]
+// * VFNMADDPS ymm, ymm, ymm, ymm [FMA4]
+// * VFNMADDPS m256, ymm, ymm, ymm [FMA4]
+// * VFNMADDPS ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFNMADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFNMADDPS", 4, Operands { v0, v1, v2, v3 })
+ // VFNMADDPS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMADDPS m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMADDPS xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMADDPS ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMADDPS m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMADDPS ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x78)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADDPS")
+ }
+ return p
+}
+
+// VFNMADDSD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADDSD
+// Supported forms : (3 forms)
+//
+// * VFNMADDSD xmm, xmm, xmm, xmm [FMA4]
+// * VFNMADDSD m64, xmm, xmm, xmm [FMA4]
+// * VFNMADDSD xmm, m64, xmm, xmm [FMA4]
+//
+func (self *Program) VFNMADDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFNMADDSD", 4, Operands { v0, v1, v2, v3 })
+ // VFNMADDSD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMADDSD m64, xmm, xmm, xmm
+ if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x7b)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMADDSD xmm, m64, xmm, xmm
+ if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x7b)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADDSD")
+ }
+ return p
+}
+
+// VFNMADDSS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMADDSS
+// Supported forms : (3 forms)
+//
+// * VFNMADDSS xmm, xmm, xmm, xmm [FMA4]
+// * VFNMADDSS m32, xmm, xmm, xmm [FMA4]
+// * VFNMADDSS xmm, m32, xmm, xmm [FMA4]
+//
+func (self *Program) VFNMADDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFNMADDSS", 4, Operands { v0, v1, v2, v3 })
+ // VFNMADDSS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMADDSS m32, xmm, xmm, xmm
+ if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMADDSS xmm, m32, xmm, xmm
+ if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x7a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMADDSS")
+ }
+ return p
+}
+
+// VFNMSUB132PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB132PD
+// Supported forms : (11 forms)
+//
+// * VFNMSUB132PD xmm, xmm, xmm [FMA3]
+// * VFNMSUB132PD m128, xmm, xmm [FMA3]
+// * VFNMSUB132PD ymm, ymm, ymm [FMA3]
+// * VFNMSUB132PD m256, ymm, ymm [FMA3]
+// * VFNMSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB132PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB132PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB132PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB132PD takes 3 or 4 operands")
+ }
+ // VFNMSUB132PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB132PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB132PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMSUB132PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB132PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMSUB132PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMSUB132PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB132PD")
+ }
+ return p
+}
+
+// VFNMSUB132PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB132PS
+// Supported forms : (11 forms)
+//
+// * VFNMSUB132PS xmm, xmm, xmm [FMA3]
+// * VFNMSUB132PS m128, xmm, xmm [FMA3]
+// * VFNMSUB132PS ymm, ymm, ymm [FMA3]
+// * VFNMSUB132PS m256, ymm, ymm [FMA3]
+// * VFNMSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB132PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB132PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB132PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB132PS takes 3 or 4 operands")
+ }
+ // VFNMSUB132PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB132PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB132PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMSUB132PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB132PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMSUB132PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMSUB132PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB132PS")
+ }
+ return p
+}
+
+// VFNMSUB132SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB132SD
+// Supported forms : (5 forms)
+//
+// * VFNMSUB132SD xmm, xmm, xmm [FMA3]
+// * VFNMSUB132SD m64, xmm, xmm [FMA3]
+// * VFNMSUB132SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB132SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMSUB132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB132SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB132SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB132SD takes 3 or 4 operands")
+ }
+ // VFNMSUB132SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB132SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x9f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFNMSUB132SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB132SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB132SD")
+ }
+ return p
+}
+
+// VFNMSUB132SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB132SS
+// Supported forms : (5 forms)
+//
+// * VFNMSUB132SS xmm, xmm, xmm [FMA3]
+// * VFNMSUB132SS m32, xmm, xmm [FMA3]
+// * VFNMSUB132SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB132SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMSUB132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB132SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB132SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB132SS takes 3 or 4 operands")
+ }
+ // VFNMSUB132SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB132SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB132SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x9f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFNMSUB132SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x9f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB132SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x9f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB132SS")
+ }
+ return p
+}
+
+// VFNMSUB213PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB213PD
+// Supported forms : (11 forms)
+//
+// * VFNMSUB213PD xmm, xmm, xmm [FMA3]
+// * VFNMSUB213PD m128, xmm, xmm [FMA3]
+// * VFNMSUB213PD ymm, ymm, ymm [FMA3]
+// * VFNMSUB213PD m256, ymm, ymm [FMA3]
+// * VFNMSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB213PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB213PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB213PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB213PD takes 3 or 4 operands")
+ }
+ // VFNMSUB213PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB213PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB213PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMSUB213PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB213PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMSUB213PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMSUB213PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB213PD")
+ }
+ return p
+}
+
+// VFNMSUB213PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB213PS
+// Supported forms : (11 forms)
+//
+// * VFNMSUB213PS xmm, xmm, xmm [FMA3]
+// * VFNMSUB213PS m128, xmm, xmm [FMA3]
+// * VFNMSUB213PS ymm, ymm, ymm [FMA3]
+// * VFNMSUB213PS m256, ymm, ymm [FMA3]
+// * VFNMSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB213PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB213PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB213PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB213PS takes 3 or 4 operands")
+ }
+ // VFNMSUB213PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB213PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB213PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMSUB213PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB213PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMSUB213PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xae)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMSUB213PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xae)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB213PS")
+ }
+ return p
+}
+
+// VFNMSUB213SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB213SD
+// Supported forms : (5 forms)
+//
+// * VFNMSUB213SD xmm, xmm, xmm [FMA3]
+// * VFNMSUB213SD m64, xmm, xmm [FMA3]
+// * VFNMSUB213SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB213SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMSUB213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB213SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB213SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB213SD takes 3 or 4 operands")
+ }
+ // VFNMSUB213SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xaf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xaf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB213SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xaf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFNMSUB213SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xaf)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB213SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xaf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB213SD")
+ }
+ return p
+}
+
+// VFNMSUB213SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB213SS
+// Supported forms : (5 forms)
+//
+// * VFNMSUB213SS xmm, xmm, xmm [FMA3]
+// * VFNMSUB213SS m32, xmm, xmm [FMA3]
+// * VFNMSUB213SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB213SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMSUB213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB213SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB213SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB213SS takes 3 or 4 operands")
+ }
+ // VFNMSUB213SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xaf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB213SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xaf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB213SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xaf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFNMSUB213SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xaf)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB213SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xaf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB213SS")
+ }
+ return p
+}
+
+// VFNMSUB231PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB231PD
+// Supported forms : (11 forms)
+//
+// * VFNMSUB231PD xmm, xmm, xmm [FMA3]
+// * VFNMSUB231PD m128, xmm, xmm [FMA3]
+// * VFNMSUB231PD ymm, ymm, ymm [FMA3]
+// * VFNMSUB231PD m256, ymm, ymm [FMA3]
+// * VFNMSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB231PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB231PD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB231PD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB231PD takes 3 or 4 operands")
+ }
+ // VFNMSUB231PD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231PD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB231PD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231PD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB231PD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMSUB231PD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB231PD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231PD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMSUB231PD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231PD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMSUB231PD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB231PD")
+ }
+ return p
+}
+
+// VFNMSUB231PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB231PS
+// Supported forms : (11 forms)
+//
+// * VFNMSUB231PS xmm, xmm, xmm [FMA3]
+// * VFNMSUB231PS m128, xmm, xmm [FMA3]
+// * VFNMSUB231PS ymm, ymm, ymm [FMA3]
+// * VFNMSUB231PS m256, ymm, ymm [FMA3]
+// * VFNMSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB231PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VFNMSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VFNMSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VFNMSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB231PS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB231PS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB231PS takes 3 or 4 operands")
+ }
+ // VFNMSUB231PS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231PS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB231PS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231PS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB231PS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VFNMSUB231PS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB231PS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231PS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VFNMSUB231PS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231PS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xbe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VFNMSUB231PS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xbe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB231PS")
+ }
+ return p
+}
+
+// VFNMSUB231SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB231SD
+// Supported forms : (5 forms)
+//
+// * VFNMSUB231SD xmm, xmm, xmm [FMA3]
+// * VFNMSUB231SD m64, xmm, xmm [FMA3]
+// * VFNMSUB231SD m64, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB231SD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMSUB231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB231SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB231SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB231SD takes 3 or 4 operands")
+ }
+ // VFNMSUB231SD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231SD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB231SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xbf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VFNMSUB231SD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbf)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB231SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB231SD")
+ }
+ return p
+}
+
+// VFNMSUB231SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUB231SS
+// Supported forms : (5 forms)
+//
+// * VFNMSUB231SS xmm, xmm, xmm [FMA3]
+// * VFNMSUB231SS m32, xmm, xmm [FMA3]
+// * VFNMSUB231SS m32, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VFNMSUB231SS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VFNMSUB231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VFNMSUB231SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VFNMSUB231SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VFNMSUB231SS takes 3 or 4 operands")
+ }
+ // VFNMSUB231SS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0xbf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFNMSUB231SS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_FMA3)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xbf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VFNMSUB231SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xbf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VFNMSUB231SS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xbf)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VFNMSUB231SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xbf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUB231SS")
+ }
+ return p
+}
+
+// VFNMSUBPD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUBPD
+// Supported forms : (6 forms)
+//
+// * VFNMSUBPD xmm, xmm, xmm, xmm [FMA4]
+// * VFNMSUBPD m128, xmm, xmm, xmm [FMA4]
+// * VFNMSUBPD xmm, m128, xmm, xmm [FMA4]
+// * VFNMSUBPD ymm, ymm, ymm, ymm [FMA4]
+// * VFNMSUBPD m256, ymm, ymm, ymm [FMA4]
+// * VFNMSUBPD ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFNMSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFNMSUBPD", 4, Operands { v0, v1, v2, v3 })
+ // VFNMSUBPD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMSUBPD m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x7d)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMSUBPD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x7d)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMSUBPD ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMSUBPD m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x7d)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMSUBPD ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x7d)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUBPD")
+ }
+ return p
+}
+
+// VFNMSUBPS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUBPS
+// Supported forms : (6 forms)
+//
+// * VFNMSUBPS xmm, xmm, xmm, xmm [FMA4]
+// * VFNMSUBPS m128, xmm, xmm, xmm [FMA4]
+// * VFNMSUBPS xmm, m128, xmm, xmm [FMA4]
+// * VFNMSUBPS ymm, ymm, ymm, ymm [FMA4]
+// * VFNMSUBPS m256, ymm, ymm, ymm [FMA4]
+// * VFNMSUBPS ymm, m256, ymm, ymm [FMA4]
+//
+func (self *Program) VFNMSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFNMSUBPS", 4, Operands { v0, v1, v2, v3 })
+ // VFNMSUBPS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMSUBPS m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x7c)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMSUBPS xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x7c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMSUBPS ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMSUBPS m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x7c)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMSUBPS ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x7c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUBPS")
+ }
+ return p
+}
+
+// VFNMSUBSD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUBSD
+// Supported forms : (3 forms)
+//
+// * VFNMSUBSD xmm, xmm, xmm, xmm [FMA4]
+// * VFNMSUBSD m64, xmm, xmm, xmm [FMA4]
+// * VFNMSUBSD xmm, m64, xmm, xmm [FMA4]
+//
+func (self *Program) VFNMSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFNMSUBSD", 4, Operands { v0, v1, v2, v3 })
+ // VFNMSUBSD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMSUBSD m64, xmm, xmm, xmm
+ if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x7f)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMSUBSD xmm, m64, xmm, xmm
+ if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x7f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUBSD")
+ }
+ return p
+}
+
+// VFNMSUBSS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFNMSUBSS
+// Supported forms : (3 forms)
+//
+// * VFNMSUBSS xmm, xmm, xmm, xmm [FMA4]
+// * VFNMSUBSS m32, xmm, xmm, xmm [FMA4]
+// * VFNMSUBSS xmm, m32, xmm, xmm [FMA4]
+//
+func (self *Program) VFNMSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VFNMSUBSS", 4, Operands { v0, v1, v2, v3 })
+ // VFNMSUBSS xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VFNMSUBSS m32, xmm, xmm, xmm
+ if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0x7e)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VFNMSUBSS xmm, m32, xmm, xmm
+ if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_FMA4)
+ p.domain = DomainFMA
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x7e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFNMSUBSS")
+ }
+ return p
+}
+
+// VFPCLASSPD performs "Test Class of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VFPCLASSPD
+// Supported forms : (6 forms)
+//
+// * VFPCLASSPD imm8, m512/m64bcst, k{k} [AVX512DQ]
+// * VFPCLASSPD imm8, zmm, k{k} [AVX512DQ]
+// * VFPCLASSPD imm8, m128/m64bcst, k{k} [AVX512DQ,AVX512VL]
+// * VFPCLASSPD imm8, m256/m64bcst, k{k} [AVX512DQ,AVX512VL]
+// * VFPCLASSPD imm8, xmm, k{k} [AVX512DQ,AVX512VL]
+// * VFPCLASSPD imm8, ymm, k{k} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VFPCLASSPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VFPCLASSPD", 3, Operands { v0, v1, v2 })
+ // VFPCLASSPD imm8, m512/m64bcst, k{k}
+ if isImm8(v0) && isM512M64bcst(v1) && isKk(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPD imm8, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit(kcode(v[2]) | 0x48)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPD imm8, m128/m64bcst, k{k}
+ if isImm8(v0) && isM128M64bcst(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPD imm8, m256/m64bcst, k{k}
+ if isImm8(v0) && isM256M64bcst(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPD imm8, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit(kcode(v[2]) | 0x08)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPD imm8, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit(kcode(v[2]) | 0x28)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFPCLASSPD")
+ }
+ return p
+}
+
+// VFPCLASSPS performs "Test Class of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VFPCLASSPS
+// Supported forms : (6 forms)
+//
+// * VFPCLASSPS imm8, m512/m32bcst, k{k} [AVX512DQ]
+// * VFPCLASSPS imm8, zmm, k{k} [AVX512DQ]
+// * VFPCLASSPS imm8, m128/m32bcst, k{k} [AVX512DQ,AVX512VL]
+// * VFPCLASSPS imm8, m256/m32bcst, k{k} [AVX512DQ,AVX512VL]
+// * VFPCLASSPS imm8, xmm, k{k} [AVX512DQ,AVX512VL]
+// * VFPCLASSPS imm8, ymm, k{k} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VFPCLASSPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VFPCLASSPS", 3, Operands { v0, v1, v2 })
+ // VFPCLASSPS imm8, m512/m32bcst, k{k}
+ if isImm8(v0) && isM512M32bcst(v1) && isKk(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPS imm8, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit(kcode(v[2]) | 0x48)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPS imm8, m128/m32bcst, k{k}
+ if isImm8(v0) && isM128M32bcst(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPS imm8, m256/m32bcst, k{k}
+ if isImm8(v0) && isM256M32bcst(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPS imm8, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit(kcode(v[2]) | 0x08)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSPS imm8, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit(kcode(v[2]) | 0x28)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFPCLASSPS")
+ }
+ return p
+}
+
+// VFPCLASSSD performs "Test Class of Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VFPCLASSSD
+// Supported forms : (2 forms)
+//
+// * VFPCLASSSD imm8, xmm, k{k} [AVX512DQ]
+// * VFPCLASSSD imm8, m64, k{k} [AVX512DQ]
+//
+func (self *Program) VFPCLASSSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VFPCLASSSD", 3, Operands { v0, v1, v2 })
+ // VFPCLASSSD imm8, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit(kcode(v[2]) | 0x08)
+ m.emit(0x67)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSSD imm8, m64, k{k}
+ if isImm8(v0) && isM64(v1) && isKk(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, 0)
+ m.emit(0x67)
+ m.mrsd(lcode(v[2]), addr(v[1]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFPCLASSSD")
+ }
+ return p
+}
+
+// VFPCLASSSS performs "Test Class of Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VFPCLASSSS
+// Supported forms : (2 forms)
+//
+// * VFPCLASSSS imm8, xmm, k{k} [AVX512DQ]
+// * VFPCLASSSS imm8, m32, k{k} [AVX512DQ]
+//
+func (self *Program) VFPCLASSSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VFPCLASSSS", 3, Operands { v0, v1, v2 })
+ // VFPCLASSSS imm8, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit(kcode(v[2]) | 0x08)
+ m.emit(0x67)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VFPCLASSSS imm8, m32, k{k}
+ if isImm8(v0) && isM32(v1) && isKk(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, 0)
+ m.emit(0x67)
+ m.mrsd(lcode(v[2]), addr(v[1]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFPCLASSSS")
+ }
+ return p
+}
+
+// VFRCZPD performs "Extract Fraction Packed Double-Precision Floating-Point".
+//
+// Mnemonic : VFRCZPD
+// Supported forms : (4 forms)
+//
+// * VFRCZPD xmm, xmm [XOP]
+// * VFRCZPD m128, xmm [XOP]
+// * VFRCZPD ymm, ymm [XOP]
+// * VFRCZPD m256, ymm [XOP]
+//
+func (self *Program) VFRCZPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VFRCZPD", 2, Operands { v0, v1 })
+ // VFRCZPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0x81)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFRCZPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x81)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VFRCZPD ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7c)
+ m.emit(0x81)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFRCZPD m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x04, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x81)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFRCZPD")
+ }
+ return p
+}
+
+// VFRCZPS performs "Extract Fraction Packed Single-Precision Floating-Point".
+//
+// Mnemonic : VFRCZPS
+// Supported forms : (4 forms)
+//
+// * VFRCZPS xmm, xmm [XOP]
+// * VFRCZPS m128, xmm [XOP]
+// * VFRCZPS ymm, ymm [XOP]
+// * VFRCZPS m256, ymm [XOP]
+//
+func (self *Program) VFRCZPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VFRCZPS", 2, Operands { v0, v1 })
+ // VFRCZPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0x80)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFRCZPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x80)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VFRCZPS ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7c)
+ m.emit(0x80)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFRCZPS m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x04, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x80)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFRCZPS")
+ }
+ return p
+}
+
+// VFRCZSD performs "Extract Fraction Scalar Double-Precision Floating-Point".
+//
+// Mnemonic : VFRCZSD
+// Supported forms : (2 forms)
+//
+// * VFRCZSD xmm, xmm [XOP]
+// * VFRCZSD m64, xmm [XOP]
+//
+func (self *Program) VFRCZSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VFRCZSD", 2, Operands { v0, v1 })
+ // VFRCZSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0x83)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFRCZSD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x83)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFRCZSD")
+ }
+ return p
+}
+
+// VFRCZSS performs "Extract Fraction Scalar Single-Precision Floating Point".
+//
+// Mnemonic : VFRCZSS
+// Supported forms : (2 forms)
+//
+// * VFRCZSS xmm, xmm [XOP]
+// * VFRCZSS m32, xmm [XOP]
+//
+func (self *Program) VFRCZSS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VFRCZSS", 2, Operands { v0, v1 })
+ // VFRCZSS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0x82)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VFRCZSS m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x82)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VFRCZSS")
+ }
+ return p
+}
+
+// VGATHERDPD performs "Gather Packed Double-Precision Floating-Point Values Using Signed Doubleword Indices".
+//
+// Mnemonic : VGATHERDPD
+// Supported forms : (5 forms)
+//
+// * VGATHERDPD xmm, vm32x, xmm [AVX2]
+// * VGATHERDPD ymm, vm32x, ymm [AVX2]
+// * VGATHERDPD vm32y, zmm{k} [AVX512F]
+// * VGATHERDPD vm32x, xmm{k} [AVX512F,AVX512VL]
+// * VGATHERDPD vm32x, ymm{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VGATHERDPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGATHERDPD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VGATHERDPD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VGATHERDPD takes 2 or 3 operands")
+ }
+ // VGATHERDPD xmm, vm32x, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x92)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VGATHERDPD ymm, vm32x, ymm
+ if len(vv) == 1 && isYMM(v0) && isVMX(v1) && isYMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x92)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VGATHERDPD vm32y, zmm{k}
+ if len(vv) == 0 && isEVEXVMY(v0) && isZMMk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x92)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VGATHERDPD vm32x, xmm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x92)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VGATHERDPD vm32x, ymm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isYMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x92)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERDPD")
+ }
+ return p
+}
+
+// VGATHERDPS performs "Gather Packed Single-Precision Floating-Point Values Using Signed Doubleword Indices".
+//
+// Mnemonic : VGATHERDPS
+// Supported forms : (5 forms)
+//
+// * VGATHERDPS xmm, vm32x, xmm [AVX2]
+// * VGATHERDPS ymm, vm32y, ymm [AVX2]
+// * VGATHERDPS vm32z, zmm{k} [AVX512F]
+// * VGATHERDPS vm32x, xmm{k} [AVX512F,AVX512VL]
+// * VGATHERDPS vm32y, ymm{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VGATHERDPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGATHERDPS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VGATHERDPS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VGATHERDPS takes 2 or 3 operands")
+ }
+ // VGATHERDPS xmm, vm32x, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x92)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VGATHERDPS ymm, vm32y, ymm
+ if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x92)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VGATHERDPS vm32z, zmm{k}
+ if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x92)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VGATHERDPS vm32x, xmm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x92)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VGATHERDPS vm32y, ymm{k}
+ if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x92)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERDPS")
+ }
+ return p
+}
+
+// VGATHERPF0DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint".
+//
+// Mnemonic : VGATHERPF0DPD
+// Supported forms : (1 form)
+//
+// * VGATHERPF0DPD vm32y{k} [AVX512PF]
+//
+func (self *Program) VGATHERPF0DPD(v0 interface{}) *Instruction {
+ p := self.alloc("VGATHERPF0DPD", 1, Operands { v0 })
+ // VGATHERPF0DPD vm32y{k}
+ if isVMYk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc6)
+ m.mrsd(1, addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERPF0DPD")
+ }
+ return p
+}
+
+// VGATHERPF0DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint".
+//
+// Mnemonic : VGATHERPF0DPS
+// Supported forms : (1 form)
+//
+// * VGATHERPF0DPS vm32z{k} [AVX512PF]
+//
+func (self *Program) VGATHERPF0DPS(v0 interface{}) *Instruction {
+ p := self.alloc("VGATHERPF0DPS", 1, Operands { v0 })
+ // VGATHERPF0DPS vm32z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc6)
+ m.mrsd(1, addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERPF0DPS")
+ }
+ return p
+}
+
+// VGATHERPF0QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint".
+//
+// Mnemonic : VGATHERPF0QPD
+// Supported forms : (1 form)
+//
+// * VGATHERPF0QPD vm64z{k} [AVX512PF]
+//
+func (self *Program) VGATHERPF0QPD(v0 interface{}) *Instruction {
+ p := self.alloc("VGATHERPF0QPD", 1, Operands { v0 })
+ // VGATHERPF0QPD vm64z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc7)
+ m.mrsd(1, addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERPF0QPD")
+ }
+ return p
+}
+
+// VGATHERPF0QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint".
+//
+// Mnemonic : VGATHERPF0QPS
+// Supported forms : (1 form)
+//
+// * VGATHERPF0QPS vm64z{k} [AVX512PF]
+//
+func (self *Program) VGATHERPF0QPS(v0 interface{}) *Instruction {
+ p := self.alloc("VGATHERPF0QPS", 1, Operands { v0 })
+ // VGATHERPF0QPS vm64z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc7)
+ m.mrsd(1, addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERPF0QPS")
+ }
+ return p
+}
+
+// VGATHERPF1DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint".
+//
+// Mnemonic : VGATHERPF1DPD
+// Supported forms : (1 form)
+//
+// * VGATHERPF1DPD vm32y{k} [AVX512PF]
+//
+func (self *Program) VGATHERPF1DPD(v0 interface{}) *Instruction {
+ p := self.alloc("VGATHERPF1DPD", 1, Operands { v0 })
+ // VGATHERPF1DPD vm32y{k}
+ if isVMYk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc6)
+ m.mrsd(2, addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERPF1DPD")
+ }
+ return p
+}
+
+// VGATHERPF1DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint".
+//
+// Mnemonic : VGATHERPF1DPS
+// Supported forms : (1 form)
+//
+// * VGATHERPF1DPS vm32z{k} [AVX512PF]
+//
+func (self *Program) VGATHERPF1DPS(v0 interface{}) *Instruction {
+ p := self.alloc("VGATHERPF1DPS", 1, Operands { v0 })
+ // VGATHERPF1DPS vm32z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc6)
+ m.mrsd(2, addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERPF1DPS")
+ }
+ return p
+}
+
+// VGATHERPF1QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint".
+//
+// Mnemonic : VGATHERPF1QPD
+// Supported forms : (1 form)
+//
+// * VGATHERPF1QPD vm64z{k} [AVX512PF]
+//
+func (self *Program) VGATHERPF1QPD(v0 interface{}) *Instruction {
+ p := self.alloc("VGATHERPF1QPD", 1, Operands { v0 })
+ // VGATHERPF1QPD vm64z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc7)
+ m.mrsd(2, addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERPF1QPD")
+ }
+ return p
+}
+
+// VGATHERPF1QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint".
+//
+// Mnemonic : VGATHERPF1QPS
+// Supported forms : (1 form)
+//
+// * VGATHERPF1QPS vm64z{k} [AVX512PF]
+//
+func (self *Program) VGATHERPF1QPS(v0 interface{}) *Instruction {
+ p := self.alloc("VGATHERPF1QPS", 1, Operands { v0 })
+ // VGATHERPF1QPS vm64z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc7)
+ m.mrsd(2, addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERPF1QPS")
+ }
+ return p
+}
+
+// VGATHERQPD performs "Gather Packed Double-Precision Floating-Point Values Using Signed Quadword Indices".
+//
+// Mnemonic : VGATHERQPD
+// Supported forms : (5 forms)
+//
+// * VGATHERQPD xmm, vm64x, xmm [AVX2]
+// * VGATHERQPD ymm, vm64y, ymm [AVX2]
+// * VGATHERQPD vm64z, zmm{k} [AVX512F]
+// * VGATHERQPD vm64x, xmm{k} [AVX512F,AVX512VL]
+// * VGATHERQPD vm64y, ymm{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VGATHERQPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGATHERQPD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VGATHERQPD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VGATHERQPD takes 2 or 3 operands")
+ }
+ // VGATHERQPD xmm, vm64x, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x93)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VGATHERQPD ymm, vm64y, ymm
+ if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x93)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VGATHERQPD vm64z, zmm{k}
+ if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x93)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VGATHERQPD vm64x, xmm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x93)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VGATHERQPD vm64y, ymm{k}
+ if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x93)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERQPD")
+ }
+ return p
+}
+
+// VGATHERQPS performs "Gather Packed Single-Precision Floating-Point Values Using Signed Quadword Indices".
+//
+// Mnemonic : VGATHERQPS
+// Supported forms : (5 forms)
+//
+// * VGATHERQPS xmm, vm64x, xmm [AVX2]
+// * VGATHERQPS xmm, vm64y, xmm [AVX2]
+// * VGATHERQPS vm64z, ymm{k} [AVX512F]
+// * VGATHERQPS vm64x, xmm{k} [AVX512F,AVX512VL]
+// * VGATHERQPS vm64y, xmm{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VGATHERQPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGATHERQPS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VGATHERQPS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VGATHERQPS takes 2 or 3 operands")
+ }
+ // VGATHERQPS xmm, vm64x, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x93)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VGATHERQPS xmm, vm64y, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMY(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x93)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VGATHERQPS vm64z, ymm{k}
+ if len(vv) == 0 && isVMZ(v0) && isYMMk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x93)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VGATHERQPS vm64x, xmm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x93)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VGATHERQPS vm64y, xmm{k}
+ if len(vv) == 0 && isEVEXVMY(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x93)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGATHERQPS")
+ }
+ return p
+}
+
+// VGETEXPPD performs "Extract Exponents of Packed Double-Precision Floating-Point Values as Double-Precision Floating-Point Values".
+//
+// Mnemonic : VGETEXPPD
+// Supported forms : (7 forms)
+//
+// * VGETEXPPD m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VGETEXPPD {sae}, zmm, zmm{k}{z} [AVX512F]
+// * VGETEXPPD zmm, zmm{k}{z} [AVX512F]
+// * VGETEXPPD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VGETEXPPD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VGETEXPPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VGETEXPPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VGETEXPPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGETEXPPD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VGETEXPPD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VGETEXPPD takes 2 or 3 operands")
+ }
+ // VGETEXPPD m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VGETEXPPD {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VGETEXPPD zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VGETEXPPD m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VGETEXPPD m256/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VGETEXPPD xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VGETEXPPD ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGETEXPPD")
+ }
+ return p
+}
+
+// VGETEXPPS performs "Extract Exponents of Packed Single-Precision Floating-Point Values as Single-Precision Floating-Point Values".
+//
+// Mnemonic : VGETEXPPS
+// Supported forms : (7 forms)
+//
+// * VGETEXPPS m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VGETEXPPS {sae}, zmm, zmm{k}{z} [AVX512F]
+// * VGETEXPPS zmm, zmm{k}{z} [AVX512F]
+// * VGETEXPPS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VGETEXPPS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VGETEXPPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VGETEXPPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VGETEXPPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGETEXPPS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VGETEXPPS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VGETEXPPS takes 2 or 3 operands")
+ }
+ // VGETEXPPS m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VGETEXPPS {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VGETEXPPS zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VGETEXPPS m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VGETEXPPS m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x42)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VGETEXPPS xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VGETEXPPS ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGETEXPPS")
+ }
+ return p
+}
+
+// VGETEXPSD performs "Extract Exponent of Scalar Double-Precision Floating-Point Value as Double-Precision Floating-Point Value".
+//
+// Mnemonic : VGETEXPSD
+// Supported forms : (3 forms)
+//
+// * VGETEXPSD m64, xmm, xmm{k}{z} [AVX512F]
+// * VGETEXPSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VGETEXPSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VGETEXPSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGETEXPSD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VGETEXPSD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VGETEXPSD takes 3 or 4 operands")
+ }
+ // VGETEXPSD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x43)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VGETEXPSD {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VGETEXPSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGETEXPSD")
+ }
+ return p
+}
+
+// VGETEXPSS performs "Extract Exponent of Scalar Single-Precision Floating-Point Value as Single-Precision Floating-Point Value".
+//
+// Mnemonic : VGETEXPSS
+// Supported forms : (3 forms)
+//
+// * VGETEXPSS m32, xmm, xmm{k}{z} [AVX512F]
+// * VGETEXPSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VGETEXPSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VGETEXPSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGETEXPSS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VGETEXPSS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VGETEXPSS takes 3 or 4 operands")
+ }
+ // VGETEXPSS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x43)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VGETEXPSS {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VGETEXPSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGETEXPSS")
+ }
+ return p
+}
+
+// VGETMANTPD performs "Extract Normalized Mantissas from Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VGETMANTPD
+// Supported forms : (7 forms)
+//
+// * VGETMANTPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VGETMANTPD imm8, {sae}, zmm, zmm{k}{z} [AVX512F]
+// * VGETMANTPD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VGETMANTPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VGETMANTPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VGETMANTPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VGETMANTPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VGETMANTPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGETMANTPD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VGETMANTPD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VGETMANTPD takes 3 or 4 operands")
+ }
+ // VGETMANTPD imm8, m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPD imm8, {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPD imm8, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPD imm8, m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPD imm8, m256/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPD imm8, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPD imm8, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGETMANTPD")
+ }
+ return p
+}
+
+// VGETMANTPS performs "Extract Normalized Mantissas from Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VGETMANTPS
+// Supported forms : (7 forms)
+//
+// * VGETMANTPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VGETMANTPS imm8, {sae}, zmm, zmm{k}{z} [AVX512F]
+// * VGETMANTPS imm8, zmm, zmm{k}{z} [AVX512F]
+// * VGETMANTPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VGETMANTPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VGETMANTPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VGETMANTPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VGETMANTPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGETMANTPS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VGETMANTPS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VGETMANTPS takes 3 or 4 operands")
+ }
+ // VGETMANTPS imm8, m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPS imm8, {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPS imm8, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPS imm8, m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPS imm8, m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPS imm8, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTPS imm8, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGETMANTPS")
+ }
+ return p
+}
+
+// VGETMANTSD performs "Extract Normalized Mantissa from Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VGETMANTSD
+// Supported forms : (3 forms)
+//
+// * VGETMANTSD imm8, m64, xmm, xmm{k}{z} [AVX512F]
+// * VGETMANTSD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VGETMANTSD imm8, xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VGETMANTSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGETMANTSD", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VGETMANTSD", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VGETMANTSD takes 4 or 5 operands")
+ }
+ // VGETMANTSD imm8, m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x27)
+ m.mrsd(lcode(v[3]), addr(v[1]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTSD imm8, {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTSD imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGETMANTSD")
+ }
+ return p
+}
+
+// VGETMANTSS performs "Extract Normalized Mantissa from Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VGETMANTSS
+// Supported forms : (3 forms)
+//
+// * VGETMANTSS imm8, m32, xmm, xmm{k}{z} [AVX512F]
+// * VGETMANTSS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VGETMANTSS imm8, xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VGETMANTSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VGETMANTSS", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VGETMANTSS", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VGETMANTSS takes 4 or 5 operands")
+ }
+ // VGETMANTSS imm8, m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x27)
+ m.mrsd(lcode(v[3]), addr(v[1]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTSS imm8, {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VGETMANTSS imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VGETMANTSS")
+ }
+ return p
+}
+
+// VHADDPD performs "Packed Double-FP Horizontal Add".
+//
+// Mnemonic : VHADDPD
+// Supported forms : (4 forms)
+//
+// * VHADDPD xmm, xmm, xmm [AVX]
+// * VHADDPD m128, xmm, xmm [AVX]
+// * VHADDPD ymm, ymm, ymm [AVX]
+// * VHADDPD m256, ymm, ymm [AVX]
+//
+func (self *Program) VHADDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VHADDPD", 3, Operands { v0, v1, v2 })
+ // VHADDPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VHADDPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x7c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VHADDPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VHADDPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x7c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VHADDPD")
+ }
+ return p
+}
+
+// VHADDPS performs "Packed Single-FP Horizontal Add".
+//
+// Mnemonic : VHADDPS
+// Supported forms : (4 forms)
+//
+// * VHADDPS xmm, xmm, xmm [AVX]
+// * VHADDPS m128, xmm, xmm [AVX]
+// * VHADDPS ymm, ymm, ymm [AVX]
+// * VHADDPS m256, ymm, ymm [AVX]
+//
+func (self *Program) VHADDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VHADDPS", 3, Operands { v0, v1, v2 })
+ // VHADDPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VHADDPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x7c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VHADDPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VHADDPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x7c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VHADDPS")
+ }
+ return p
+}
+
+// VHSUBPD performs "Packed Double-FP Horizontal Subtract".
+//
+// Mnemonic : VHSUBPD
+// Supported forms : (4 forms)
+//
+// * VHSUBPD xmm, xmm, xmm [AVX]
+// * VHSUBPD m128, xmm, xmm [AVX]
+// * VHSUBPD ymm, ymm, ymm [AVX]
+// * VHSUBPD m256, ymm, ymm [AVX]
+//
+func (self *Program) VHSUBPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VHSUBPD", 3, Operands { v0, v1, v2 })
+ // VHSUBPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VHSUBPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VHSUBPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VHSUBPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VHSUBPD")
+ }
+ return p
+}
+
+// VHSUBPS performs "Packed Single-FP Horizontal Subtract".
+//
+// Mnemonic : VHSUBPS
+// Supported forms : (4 forms)
+//
+// * VHSUBPS xmm, xmm, xmm [AVX]
+// * VHSUBPS m128, xmm, xmm [AVX]
+// * VHSUBPS ymm, ymm, ymm [AVX]
+// * VHSUBPS m256, ymm, ymm [AVX]
+//
+func (self *Program) VHSUBPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VHSUBPS", 3, Operands { v0, v1, v2 })
+ // VHSUBPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VHSUBPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VHSUBPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VHSUBPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VHSUBPS")
+ }
+ return p
+}
+
+// VINSERTF128 performs "Insert Packed Floating-Point Values".
+//
+// Mnemonic : VINSERTF128
+// Supported forms : (2 forms)
+//
+// * VINSERTF128 imm8, xmm, ymm, ymm [AVX]
+// * VINSERTF128 imm8, m128, ymm, ymm [AVX]
+//
+func (self *Program) VINSERTF128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTF128", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTF128 imm8, xmm, ymm, ymm
+ if isImm8(v0) && isXMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTF128 imm8, m128, ymm, ymm
+ if isImm8(v0) && isM128(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x18)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTF128")
+ }
+ return p
+}
+
+// VINSERTF32X4 performs "Insert 128 Bits of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VINSERTF32X4
+// Supported forms : (4 forms)
+//
+// * VINSERTF32X4 imm8, xmm, zmm, zmm{k}{z} [AVX512F]
+// * VINSERTF32X4 imm8, m128, zmm, zmm{k}{z} [AVX512F]
+// * VINSERTF32X4 imm8, xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VINSERTF32X4 imm8, m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VINSERTF32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTF32X4", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTF32X4 imm8, xmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTF32X4 imm8, m128, zmm, zmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x18)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTF32X4 imm8, xmm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTF32X4 imm8, m128, ymm, ymm{k}{z}
+ if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x18)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTF32X4")
+ }
+ return p
+}
+
+// VINSERTF32X8 performs "Insert 256 Bits of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VINSERTF32X8
+// Supported forms : (2 forms)
+//
+// * VINSERTF32X8 imm8, ymm, zmm, zmm{k}{z} [AVX512DQ]
+// * VINSERTF32X8 imm8, m256, zmm, zmm{k}{z} [AVX512DQ]
+//
+func (self *Program) VINSERTF32X8(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTF32X8", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTF32X8 imm8, ymm, zmm, zmm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x1a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTF32X8 imm8, m256, zmm, zmm{k}{z}
+ if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x1a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTF32X8")
+ }
+ return p
+}
+
+// VINSERTF64X2 performs "Insert 128 Bits of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VINSERTF64X2
+// Supported forms : (4 forms)
+//
+// * VINSERTF64X2 imm8, xmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VINSERTF64X2 imm8, m128, zmm, zmm{k}{z} [AVX512DQ]
+// * VINSERTF64X2 imm8, xmm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VINSERTF64X2 imm8, m128, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VINSERTF64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTF64X2", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTF64X2 imm8, xmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTF64X2 imm8, m128, zmm, zmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x18)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTF64X2 imm8, xmm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x18)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTF64X2 imm8, m128, ymm, ymm{k}{z}
+ if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x18)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTF64X2")
+ }
+ return p
+}
+
+// VINSERTF64X4 performs "Insert 256 Bits of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VINSERTF64X4
+// Supported forms : (2 forms)
+//
+// * VINSERTF64X4 imm8, ymm, zmm, zmm{k}{z} [AVX512F]
+// * VINSERTF64X4 imm8, m256, zmm, zmm{k}{z} [AVX512F]
+//
+func (self *Program) VINSERTF64X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTF64X4", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTF64X4 imm8, ymm, zmm, zmm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x1a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTF64X4 imm8, m256, zmm, zmm{k}{z}
+ if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x1a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTF64X4")
+ }
+ return p
+}
+
+// VINSERTI128 performs "Insert Packed Integer Values".
+//
+// Mnemonic : VINSERTI128
+// Supported forms : (2 forms)
+//
+// * VINSERTI128 imm8, xmm, ymm, ymm [AVX2]
+// * VINSERTI128 imm8, m128, ymm, ymm [AVX2]
+//
+func (self *Program) VINSERTI128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTI128", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTI128 imm8, xmm, ymm, ymm
+ if isImm8(v0) && isXMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTI128 imm8, m128, ymm, ymm
+ if isImm8(v0) && isM128(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x38)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTI128")
+ }
+ return p
+}
+
+// VINSERTI32X4 performs "Insert 128 Bits of Packed Doubleword Integer Values".
+//
+// Mnemonic : VINSERTI32X4
+// Supported forms : (4 forms)
+//
+// * VINSERTI32X4 imm8, xmm, zmm, zmm{k}{z} [AVX512F]
+// * VINSERTI32X4 imm8, m128, zmm, zmm{k}{z} [AVX512F]
+// * VINSERTI32X4 imm8, xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VINSERTI32X4 imm8, m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VINSERTI32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTI32X4", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTI32X4 imm8, xmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTI32X4 imm8, m128, zmm, zmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x38)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTI32X4 imm8, xmm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTI32X4 imm8, m128, ymm, ymm{k}{z}
+ if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x38)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTI32X4")
+ }
+ return p
+}
+
+// VINSERTI32X8 performs "Insert 256 Bits of Packed Doubleword Integer Values".
+//
+// Mnemonic : VINSERTI32X8
+// Supported forms : (2 forms)
+//
+// * VINSERTI32X8 imm8, ymm, zmm, zmm{k}{z} [AVX512DQ]
+// * VINSERTI32X8 imm8, m256, zmm, zmm{k}{z} [AVX512DQ]
+//
+func (self *Program) VINSERTI32X8(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTI32X8", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTI32X8 imm8, ymm, zmm, zmm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTI32X8 imm8, m256, zmm, zmm{k}{z}
+ if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x3a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTI32X8")
+ }
+ return p
+}
+
+// VINSERTI64X2 performs "Insert 128 Bits of Packed Quadword Integer Values".
+//
+// Mnemonic : VINSERTI64X2
+// Supported forms : (4 forms)
+//
+// * VINSERTI64X2 imm8, xmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VINSERTI64X2 imm8, m128, zmm, zmm{k}{z} [AVX512DQ]
+// * VINSERTI64X2 imm8, xmm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VINSERTI64X2 imm8, m128, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VINSERTI64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTI64X2", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTI64X2 imm8, xmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTI64X2 imm8, m128, zmm, zmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x38)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTI64X2 imm8, xmm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTI64X2 imm8, m128, ymm, ymm{k}{z}
+ if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x38)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTI64X2")
+ }
+ return p
+}
+
+// VINSERTI64X4 performs "Insert 256 Bits of Packed Quadword Integer Values".
+//
+// Mnemonic : VINSERTI64X4
+// Supported forms : (2 forms)
+//
+// * VINSERTI64X4 imm8, ymm, zmm, zmm{k}{z} [AVX512F]
+// * VINSERTI64X4 imm8, m256, zmm, zmm{k}{z} [AVX512F]
+//
+func (self *Program) VINSERTI64X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTI64X4", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTI64X4 imm8, ymm, zmm, zmm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTI64X4 imm8, m256, zmm, zmm{k}{z}
+ if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x3a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTI64X4")
+ }
+ return p
+}
+
+// VINSERTPS performs "Insert Packed Single Precision Floating-Point Value".
+//
+// Mnemonic : VINSERTPS
+// Supported forms : (4 forms)
+//
+// * VINSERTPS imm8, xmm, xmm, xmm [AVX]
+// * VINSERTPS imm8, m32, xmm, xmm [AVX]
+// * VINSERTPS imm8, xmm, xmm, xmm [AVX512F]
+// * VINSERTPS imm8, m32, xmm, xmm [AVX512F]
+//
+func (self *Program) VINSERTPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VINSERTPS", 4, Operands { v0, v1, v2, v3 })
+ // VINSERTPS imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTPS imm8, m32, xmm, xmm
+ if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x21)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTPS imm8, xmm, xmm, xmm
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VINSERTPS imm8, m32, xmm, xmm
+ if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x21)
+ m.mrsd(lcode(v[3]), addr(v[1]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VINSERTPS")
+ }
+ return p
+}
+
+// VLDDQU performs "Load Unaligned Integer 128 Bits".
+//
+// Mnemonic : VLDDQU
+// Supported forms : (2 forms)
+//
+// * VLDDQU m128, xmm [AVX]
+// * VLDDQU m256, ymm [AVX]
+//
+func (self *Program) VLDDQU(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VLDDQU", 2, Operands { v0, v1 })
+ // VLDDQU m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VLDDQU m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xf0)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VLDDQU")
+ }
+ return p
+}
+
+// VLDMXCSR performs "Load MXCSR Register".
+//
+// Mnemonic : VLDMXCSR
+// Supported forms : (1 form)
+//
+// * VLDMXCSR m32 [AVX]
+//
+func (self *Program) VLDMXCSR(v0 interface{}) *Instruction {
+ p := self.alloc("VLDMXCSR", 1, Operands { v0 })
+ // VLDMXCSR m32
+ if isM32(v0) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, addr(v[0]), 0)
+ m.emit(0xae)
+ m.mrsd(2, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VLDMXCSR")
+ }
+ return p
+}
+
+// VMASKMOVDQU performs "Store Selected Bytes of Double Quadword".
+//
+// Mnemonic : VMASKMOVDQU
+// Supported forms : (1 form)
+//
+// * VMASKMOVDQU xmm, xmm [AVX]
+//
+func (self *Program) VMASKMOVDQU(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMASKMOVDQU", 2, Operands { v0, v1 })
+ // VMASKMOVDQU xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0xf7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMASKMOVDQU")
+ }
+ return p
+}
+
+// VMASKMOVPD performs "Conditional Move Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VMASKMOVPD
+// Supported forms : (4 forms)
+//
+// * VMASKMOVPD m128, xmm, xmm [AVX]
+// * VMASKMOVPD m256, ymm, ymm [AVX]
+// * VMASKMOVPD xmm, xmm, m128 [AVX]
+// * VMASKMOVPD ymm, ymm, m256 [AVX]
+//
+func (self *Program) VMASKMOVPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VMASKMOVPD", 3, Operands { v0, v1, v2 })
+ // VMASKMOVPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMASKMOVPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMASKMOVPD xmm, xmm, m128
+ if isXMM(v0) && isXMM(v1) && isM128(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1]))
+ m.emit(0x2f)
+ m.mrsd(lcode(v[0]), addr(v[2]), 1)
+ })
+ }
+ // VMASKMOVPD ymm, ymm, m256
+ if isYMM(v0) && isYMM(v1) && isM256(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1]))
+ m.emit(0x2f)
+ m.mrsd(lcode(v[0]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMASKMOVPD")
+ }
+ return p
+}
+
+// VMASKMOVPS performs "Conditional Move Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMASKMOVPS
+// Supported forms : (4 forms)
+//
+// * VMASKMOVPS m128, xmm, xmm [AVX]
+// * VMASKMOVPS m256, ymm, ymm [AVX]
+// * VMASKMOVPS xmm, xmm, m128 [AVX]
+// * VMASKMOVPS ymm, ymm, m256 [AVX]
+//
+func (self *Program) VMASKMOVPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VMASKMOVPS", 3, Operands { v0, v1, v2 })
+ // VMASKMOVPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMASKMOVPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMASKMOVPS xmm, xmm, m128
+ if isXMM(v0) && isXMM(v1) && isM128(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1]))
+ m.emit(0x2e)
+ m.mrsd(lcode(v[0]), addr(v[2]), 1)
+ })
+ }
+ // VMASKMOVPS ymm, ymm, m256
+ if isYMM(v0) && isYMM(v1) && isM256(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1]))
+ m.emit(0x2e)
+ m.mrsd(lcode(v[0]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMASKMOVPS")
+ }
+ return p
+}
+
+// VMAXPD performs "Return Maximum Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VMAXPD
+// Supported forms : (11 forms)
+//
+// * VMAXPD xmm, xmm, xmm [AVX]
+// * VMAXPD m128, xmm, xmm [AVX]
+// * VMAXPD ymm, ymm, ymm [AVX]
+// * VMAXPD m256, ymm, ymm [AVX]
+// * VMAXPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VMAXPD {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMAXPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMAXPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMAXPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMAXPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMAXPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMAXPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMAXPD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMAXPD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMAXPD takes 3 or 4 operands")
+ }
+ // VMAXPD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXPD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMAXPD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXPD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMAXPD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VMAXPD {sae}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMAXPD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXPD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VMAXPD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXPD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VMAXPD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMAXPD")
+ }
+ return p
+}
+
+// VMAXPS performs "Return Maximum Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMAXPS
+// Supported forms : (11 forms)
+//
+// * VMAXPS xmm, xmm, xmm [AVX]
+// * VMAXPS m128, xmm, xmm [AVX]
+// * VMAXPS ymm, ymm, ymm [AVX]
+// * VMAXPS m256, ymm, ymm [AVX]
+// * VMAXPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VMAXPS {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMAXPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMAXPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMAXPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMAXPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMAXPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMAXPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMAXPS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMAXPS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMAXPS takes 3 or 4 operands")
+ }
+ // VMAXPS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXPS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMAXPS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXPS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMAXPS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VMAXPS {sae}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMAXPS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXPS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VMAXPS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXPS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VMAXPS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMAXPS")
+ }
+ return p
+}
+
+// VMAXSD performs "Return Maximum Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VMAXSD
+// Supported forms : (5 forms)
+//
+// * VMAXSD xmm, xmm, xmm [AVX]
+// * VMAXSD m64, xmm, xmm [AVX]
+// * VMAXSD m64, xmm, xmm{k}{z} [AVX512F]
+// * VMAXSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VMAXSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VMAXSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMAXSD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMAXSD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMAXSD takes 3 or 4 operands")
+ }
+ // VMAXSD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXSD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMAXSD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VMAXSD {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMAXSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMAXSD")
+ }
+ return p
+}
+
+// VMAXSS performs "Return Maximum Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VMAXSS
+// Supported forms : (5 forms)
+//
+// * VMAXSS xmm, xmm, xmm [AVX]
+// * VMAXSS m32, xmm, xmm [AVX]
+// * VMAXSS m32, xmm, xmm{k}{z} [AVX512F]
+// * VMAXSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VMAXSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VMAXSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMAXSS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMAXSS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMAXSS takes 3 or 4 operands")
+ }
+ // VMAXSS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMAXSS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMAXSS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VMAXSS {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMAXSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMAXSS")
+ }
+ return p
+}
+
+// VMINPD performs "Return Minimum Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VMINPD
+// Supported forms : (11 forms)
+//
+// * VMINPD xmm, xmm, xmm [AVX]
+// * VMINPD m128, xmm, xmm [AVX]
+// * VMINPD ymm, ymm, ymm [AVX]
+// * VMINPD m256, ymm, ymm [AVX]
+// * VMINPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VMINPD {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMINPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMINPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMINPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMINPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMINPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMINPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMINPD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMINPD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMINPD takes 3 or 4 operands")
+ }
+ // VMINPD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINPD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMINPD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINPD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMINPD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VMINPD {sae}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMINPD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINPD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VMINPD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINPD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VMINPD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMINPD")
+ }
+ return p
+}
+
+// VMINPS performs "Return Minimum Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMINPS
+// Supported forms : (11 forms)
+//
+// * VMINPS xmm, xmm, xmm [AVX]
+// * VMINPS m128, xmm, xmm [AVX]
+// * VMINPS ymm, ymm, ymm [AVX]
+// * VMINPS m256, ymm, ymm [AVX]
+// * VMINPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VMINPS {sae}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMINPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMINPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMINPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMINPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMINPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMINPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMINPS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMINPS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMINPS takes 3 or 4 operands")
+ }
+ // VMINPS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINPS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMINPS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINPS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMINPS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VMINPS {sae}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMINPS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINPS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VMINPS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINPS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VMINPS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMINPS")
+ }
+ return p
+}
+
+// VMINSD performs "Return Minimum Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VMINSD
+// Supported forms : (5 forms)
+//
+// * VMINSD xmm, xmm, xmm [AVX]
+// * VMINSD m64, xmm, xmm [AVX]
+// * VMINSD m64, xmm, xmm{k}{z} [AVX512F]
+// * VMINSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VMINSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VMINSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMINSD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMINSD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMINSD takes 3 or 4 operands")
+ }
+ // VMINSD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINSD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMINSD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VMINSD {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMINSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMINSD")
+ }
+ return p
+}
+
+// VMINSS performs "Return Minimum Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VMINSS
+// Supported forms : (5 forms)
+//
+// * VMINSS xmm, xmm, xmm [AVX]
+// * VMINSS m32, xmm, xmm [AVX]
+// * VMINSS m32, xmm, xmm{k}{z} [AVX512F]
+// * VMINSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VMINSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VMINSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMINSS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMINSS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMINSS takes 3 or 4 operands")
+ }
+ // VMINSS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMINSS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMINSS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VMINSS {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMINSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMINSS")
+ }
+ return p
+}
+
+// VMOVAPD performs "Move Aligned Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VMOVAPD
+// Supported forms : (15 forms)
+//
+// * VMOVAPD xmm, xmm [AVX]
+// * VMOVAPD m128, xmm [AVX]
+// * VMOVAPD ymm, ymm [AVX]
+// * VMOVAPD m256, ymm [AVX]
+// * VMOVAPD xmm, m128 [AVX]
+// * VMOVAPD ymm, m256 [AVX]
+// * VMOVAPD zmm, m512{k}{z} [AVX512F]
+// * VMOVAPD zmm, zmm{k}{z} [AVX512F]
+// * VMOVAPD m512, zmm{k}{z} [AVX512F]
+// * VMOVAPD xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPD ymm, m256{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPD m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPD m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVAPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVAPD", 2, Operands { v0, v1 })
+ // VMOVAPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), v[1], 0)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVAPD ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), v[0], 0)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[0]), v[1], 0)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPD m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVAPD xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVAPD ymm, m256
+ if isYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVAPD zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVAPD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPD m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVAPD xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVAPD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPD ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVAPD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPD m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVAPD m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVAPD")
+ }
+ return p
+}
+
+// VMOVAPS performs "Move Aligned Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMOVAPS
+// Supported forms : (15 forms)
+//
+// * VMOVAPS xmm, xmm [AVX]
+// * VMOVAPS m128, xmm [AVX]
+// * VMOVAPS ymm, ymm [AVX]
+// * VMOVAPS m256, ymm [AVX]
+// * VMOVAPS xmm, m128 [AVX]
+// * VMOVAPS ymm, m256 [AVX]
+// * VMOVAPS zmm, m512{k}{z} [AVX512F]
+// * VMOVAPS zmm, zmm{k}{z} [AVX512F]
+// * VMOVAPS m512, zmm{k}{z} [AVX512F]
+// * VMOVAPS xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPS ymm, m256{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPS m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVAPS m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVAPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVAPS", 2, Operands { v0, v1 })
+ // VMOVAPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[0]), v[1], 0)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVAPS ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), v[0], 0)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[0]), v[1], 0)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPS m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVAPS xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVAPS ymm, m256
+ if isYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVAPS zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVAPS zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPS m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVAPS xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVAPS xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPS ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x29)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVAPS ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVAPS m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVAPS m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x28)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVAPS")
+ }
+ return p
+}
+
+// VMOVD performs "Move Doubleword".
+//
+// Mnemonic : VMOVD
+// Supported forms : (8 forms)
+//
+// * VMOVD xmm, r32 [AVX]
+// * VMOVD r32, xmm [AVX]
+// * VMOVD m32, xmm [AVX]
+// * VMOVD xmm, m32 [AVX]
+// * VMOVD xmm, r32 [AVX512F]
+// * VMOVD r32, xmm [AVX512F]
+// * VMOVD m32, xmm [AVX512F]
+// * VMOVD xmm, m32 [AVX512F]
+//
+func (self *Program) VMOVD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVD", 2, Operands { v0, v1 })
+ // VMOVD xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), v[1], 0)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVD r32, xmm
+ if isReg32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVD m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x6e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVD xmm, m32
+ if isXMM(v0) && isM32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVD xmm, r32
+ if isEVEXXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit(0x08)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVD r32, xmm
+ if isReg32(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit(0x08)
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVD m32, xmm
+ if isM32(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x6e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VMOVD xmm, m32
+ if isEVEXXMM(v0) && isM32(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVD")
+ }
+ return p
+}
+
+// VMOVDDUP performs "Move One Double-FP and Duplicate".
+//
+// Mnemonic : VMOVDDUP
+// Supported forms : (10 forms)
+//
+// * VMOVDDUP xmm, xmm [AVX]
+// * VMOVDDUP m64, xmm [AVX]
+// * VMOVDDUP ymm, ymm [AVX]
+// * VMOVDDUP m256, ymm [AVX]
+// * VMOVDDUP zmm, zmm{k}{z} [AVX512F]
+// * VMOVDDUP m512, zmm{k}{z} [AVX512F]
+// * VMOVDDUP xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDDUP ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDDUP m64, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDDUP m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVDDUP(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVDDUP", 2, Operands { v0, v1 })
+ // VMOVDDUP xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), v[0], 0)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVDDUP m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVDDUP ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[1]), v[0], 0)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVDDUP m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVDDUP zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVDDUP m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVDDUP xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVDDUP ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVDDUP m64, xmm{k}{z}
+ if isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VMOVDDUP m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVDDUP")
+ }
+ return p
+}
+
+// VMOVDQA performs "Move Aligned Double Quadword".
+//
+// Mnemonic : VMOVDQA
+// Supported forms : (6 forms)
+//
+// * VMOVDQA xmm, xmm [AVX]
+// * VMOVDQA m128, xmm [AVX]
+// * VMOVDQA ymm, ymm [AVX]
+// * VMOVDQA m256, ymm [AVX]
+// * VMOVDQA xmm, m128 [AVX]
+// * VMOVDQA ymm, m256 [AVX]
+//
+func (self *Program) VMOVDQA(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVDQA", 2, Operands { v0, v1 })
+ // VMOVDQA xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), v[1], 0)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQA m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVDQA ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), v[0], 0)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[0]), v[1], 0)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQA m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVDQA xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVDQA ymm, m256
+ if isYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVDQA")
+ }
+ return p
+}
+
+// VMOVDQA32 performs "Move Aligned Doubleword Values".
+//
+// Mnemonic : VMOVDQA32
+// Supported forms : (9 forms)
+//
+// * VMOVDQA32 zmm, m512{k}{z} [AVX512F]
+// * VMOVDQA32 zmm, zmm{k}{z} [AVX512F]
+// * VMOVDQA32 m512, zmm{k}{z} [AVX512F]
+// * VMOVDQA32 xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA32 xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA32 ymm, m256{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA32 ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA32 m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA32 m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVDQA32(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVDQA32", 2, Operands { v0, v1 })
+ // VMOVDQA32 zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVDQA32 zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQA32 m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVDQA32 xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVDQA32 xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQA32 ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVDQA32 ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQA32 m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVDQA32 m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVDQA32")
+ }
+ return p
+}
+
+// VMOVDQA64 performs "Move Aligned Quadword Values".
+//
+// Mnemonic : VMOVDQA64
+// Supported forms : (9 forms)
+//
+// * VMOVDQA64 zmm, m512{k}{z} [AVX512F]
+// * VMOVDQA64 zmm, zmm{k}{z} [AVX512F]
+// * VMOVDQA64 m512, zmm{k}{z} [AVX512F]
+// * VMOVDQA64 xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA64 xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA64 ymm, m256{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA64 ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA64 m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQA64 m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVDQA64(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVDQA64", 2, Operands { v0, v1 })
+ // VMOVDQA64 zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVDQA64 zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQA64 m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVDQA64 xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVDQA64 xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQA64 ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVDQA64 ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQA64 m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVDQA64 m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVDQA64")
+ }
+ return p
+}
+
+// VMOVDQU performs "Move Unaligned Double Quadword".
+//
+// Mnemonic : VMOVDQU
+// Supported forms : (6 forms)
+//
+// * VMOVDQU xmm, xmm [AVX]
+// * VMOVDQU m128, xmm [AVX]
+// * VMOVDQU ymm, ymm [AVX]
+// * VMOVDQU m256, ymm [AVX]
+// * VMOVDQU xmm, m128 [AVX]
+// * VMOVDQU ymm, m256 [AVX]
+//
+func (self *Program) VMOVDQU(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVDQU", 2, Operands { v0, v1 })
+ // VMOVDQU xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), v[0], 0)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[0]), v[1], 0)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVDQU ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), v[0], 0)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[0]), v[1], 0)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVDQU xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVDQU ymm, m256
+ if isYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVDQU")
+ }
+ return p
+}
+
+// VMOVDQU16 performs "Move Unaligned Word Values".
+//
+// Mnemonic : VMOVDQU16
+// Supported forms : (9 forms)
+//
+// * VMOVDQU16 zmm, m512{k}{z} [AVX512BW]
+// * VMOVDQU16 zmm, zmm{k}{z} [AVX512BW]
+// * VMOVDQU16 m512, zmm{k}{z} [AVX512BW]
+// * VMOVDQU16 xmm, m128{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU16 xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU16 ymm, m256{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU16 ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU16 m128, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU16 m256, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VMOVDQU16(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVDQU16", 2, Operands { v0, v1 })
+ // VMOVDQU16 zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVDQU16 zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU16 m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVDQU16 xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVDQU16 xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU16 ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVDQU16 ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xff)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU16 m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVDQU16 m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVDQU16")
+ }
+ return p
+}
+
+// VMOVDQU32 performs "Move Unaligned Doubleword Values".
+//
+// Mnemonic : VMOVDQU32
+// Supported forms : (9 forms)
+//
+// * VMOVDQU32 zmm, m512{k}{z} [AVX512F]
+// * VMOVDQU32 zmm, zmm{k}{z} [AVX512F]
+// * VMOVDQU32 m512, zmm{k}{z} [AVX512F]
+// * VMOVDQU32 xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU32 xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU32 ymm, m256{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU32 ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU32 m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU32 m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVDQU32(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVDQU32", 2, Operands { v0, v1 })
+ // VMOVDQU32 zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVDQU32 zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU32 m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVDQU32 xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVDQU32 xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU32 ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVDQU32 ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU32 m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVDQU32 m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVDQU32")
+ }
+ return p
+}
+
+// VMOVDQU64 performs "Move Unaligned Quadword Values".
+//
+// Mnemonic : VMOVDQU64
+// Supported forms : (9 forms)
+//
+// * VMOVDQU64 zmm, m512{k}{z} [AVX512F]
+// * VMOVDQU64 zmm, zmm{k}{z} [AVX512F]
+// * VMOVDQU64 m512, zmm{k}{z} [AVX512F]
+// * VMOVDQU64 xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU64 xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU64 ymm, m256{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU64 ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU64 m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVDQU64 m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVDQU64(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVDQU64", 2, Operands { v0, v1 })
+ // VMOVDQU64 zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVDQU64 zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU64 m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVDQU64 xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVDQU64 xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU64 ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVDQU64 ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfe)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU64 m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVDQU64 m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVDQU64")
+ }
+ return p
+}
+
+// VMOVDQU8 performs "Move Unaligned Byte Values".
+//
+// Mnemonic : VMOVDQU8
+// Supported forms : (9 forms)
+//
+// * VMOVDQU8 zmm, m512{k}{z} [AVX512BW]
+// * VMOVDQU8 zmm, zmm{k}{z} [AVX512BW]
+// * VMOVDQU8 m512, zmm{k}{z} [AVX512BW]
+// * VMOVDQU8 xmm, m128{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU8 xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU8 ymm, m256{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU8 ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU8 m128, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VMOVDQU8 m256, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VMOVDQU8(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVDQU8", 2, Operands { v0, v1 })
+ // VMOVDQU8 zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVDQU8 zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU8 m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVDQU8 xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVDQU8 xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU8 ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x7f)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVDQU8 ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x6f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVDQU8 m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVDQU8 m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x6f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVDQU8")
+ }
+ return p
+}
+
+// VMOVHLPS performs "Move Packed Single-Precision Floating-Point Values High to Low".
+//
+// Mnemonic : VMOVHLPS
+// Supported forms : (2 forms)
+//
+// * VMOVHLPS xmm, xmm, xmm [AVX]
+// * VMOVHLPS xmm, xmm, xmm [AVX512F]
+//
+func (self *Program) VMOVHLPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VMOVHLPS", 3, Operands { v0, v1, v2 })
+ // VMOVHLPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVHLPS xmm, xmm, xmm
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVHLPS")
+ }
+ return p
+}
+
+// VMOVHPD performs "Move High Packed Double-Precision Floating-Point Value".
+//
+// Mnemonic : VMOVHPD
+// Supported forms : (4 forms)
+//
+// * VMOVHPD xmm, m64 [AVX]
+// * VMOVHPD m64, xmm, xmm [AVX]
+// * VMOVHPD xmm, m64 [AVX512F]
+// * VMOVHPD m64, xmm, xmm [AVX512F]
+//
+func (self *Program) VMOVHPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMOVHPD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VMOVHPD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VMOVHPD takes 2 or 3 operands")
+ }
+ // VMOVHPD xmm, m64
+ if len(vv) == 0 && isXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x17)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVHPD m64, xmm, xmm
+ if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x16)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMOVHPD xmm, m64
+ if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x17)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VMOVHPD m64, xmm, xmm
+ if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVHPD")
+ }
+ return p
+}
+
+// VMOVHPS performs "Move High Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMOVHPS
+// Supported forms : (4 forms)
+//
+// * VMOVHPS xmm, m64 [AVX]
+// * VMOVHPS m64, xmm, xmm [AVX]
+// * VMOVHPS xmm, m64 [AVX512F]
+// * VMOVHPS m64, xmm, xmm [AVX512F]
+//
+func (self *Program) VMOVHPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMOVHPS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VMOVHPS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VMOVHPS takes 2 or 3 operands")
+ }
+ // VMOVHPS xmm, m64
+ if len(vv) == 0 && isXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x17)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVHPS m64, xmm, xmm
+ if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x16)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMOVHPS xmm, m64
+ if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x17)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VMOVHPS m64, xmm, xmm
+ if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVHPS")
+ }
+ return p
+}
+
+// VMOVLHPS performs "Move Packed Single-Precision Floating-Point Values Low to High".
+//
+// Mnemonic : VMOVLHPS
+// Supported forms : (2 forms)
+//
+// * VMOVLHPS xmm, xmm, xmm [AVX]
+// * VMOVLHPS xmm, xmm, xmm [AVX512F]
+//
+func (self *Program) VMOVLHPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VMOVLHPS", 3, Operands { v0, v1, v2 })
+ // VMOVLHPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVLHPS xmm, xmm, xmm
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVLHPS")
+ }
+ return p
+}
+
+// VMOVLPD performs "Move Low Packed Double-Precision Floating-Point Value".
+//
+// Mnemonic : VMOVLPD
+// Supported forms : (4 forms)
+//
+// * VMOVLPD xmm, m64 [AVX]
+// * VMOVLPD m64, xmm, xmm [AVX]
+// * VMOVLPD xmm, m64 [AVX512F]
+// * VMOVLPD m64, xmm, xmm [AVX512F]
+//
+func (self *Program) VMOVLPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMOVLPD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VMOVLPD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VMOVLPD takes 2 or 3 operands")
+ }
+ // VMOVLPD xmm, m64
+ if len(vv) == 0 && isXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVLPD m64, xmm, xmm
+ if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x12)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMOVLPD xmm, m64
+ if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VMOVLPD m64, xmm, xmm
+ if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVLPD")
+ }
+ return p
+}
+
+// VMOVLPS performs "Move Low Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMOVLPS
+// Supported forms : (4 forms)
+//
+// * VMOVLPS xmm, m64 [AVX]
+// * VMOVLPS m64, xmm, xmm [AVX]
+// * VMOVLPS xmm, m64 [AVX512F]
+// * VMOVLPS m64, xmm, xmm [AVX512F]
+//
+func (self *Program) VMOVLPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMOVLPS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VMOVLPS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VMOVLPS takes 2 or 3 operands")
+ }
+ // VMOVLPS xmm, m64
+ if len(vv) == 0 && isXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVLPS m64, xmm, xmm
+ if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x12)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMOVLPS xmm, m64
+ if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VMOVLPS m64, xmm, xmm
+ if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVLPS")
+ }
+ return p
+}
+
+// VMOVMSKPD performs "Extract Packed Double-Precision Floating-Point Sign Mask".
+//
+// Mnemonic : VMOVMSKPD
+// Supported forms : (2 forms)
+//
+// * VMOVMSKPD xmm, r32 [AVX]
+// * VMOVMSKPD ymm, r32 [AVX]
+//
+func (self *Program) VMOVMSKPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVMSKPD", 2, Operands { v0, v1 })
+ // VMOVMSKPD xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVMSKPD ymm, r32
+ if isYMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), v[0], 0)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVMSKPD")
+ }
+ return p
+}
+
+// VMOVMSKPS performs "Extract Packed Single-Precision Floating-Point Sign Mask".
+//
+// Mnemonic : VMOVMSKPS
+// Supported forms : (2 forms)
+//
+// * VMOVMSKPS xmm, r32 [AVX]
+// * VMOVMSKPS ymm, r32 [AVX]
+//
+func (self *Program) VMOVMSKPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVMSKPS", 2, Operands { v0, v1 })
+ // VMOVMSKPS xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVMSKPS ymm, r32
+ if isYMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), v[0], 0)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVMSKPS")
+ }
+ return p
+}
+
+// VMOVNTDQ performs "Store Double Quadword Using Non-Temporal Hint".
+//
+// Mnemonic : VMOVNTDQ
+// Supported forms : (5 forms)
+//
+// * VMOVNTDQ xmm, m128 [AVX]
+// * VMOVNTDQ ymm, m256 [AVX]
+// * VMOVNTDQ zmm, m512 [AVX512F]
+// * VMOVNTDQ xmm, m128 [AVX512F,AVX512VL]
+// * VMOVNTDQ ymm, m256 [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVNTDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVNTDQ", 2, Operands { v0, v1 })
+ // VMOVNTDQ xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0xe7)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVNTDQ ymm, m256
+ if isYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0xe7)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVNTDQ zmm, m512
+ if isZMM(v0) && isM512(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0xe7)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVNTDQ xmm, m128
+ if isEVEXXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0xe7)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVNTDQ ymm, m256
+ if isEVEXYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0xe7)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVNTDQ")
+ }
+ return p
+}
+
+// VMOVNTDQA performs "Load Double Quadword Non-Temporal Aligned Hint".
+//
+// Mnemonic : VMOVNTDQA
+// Supported forms : (5 forms)
+//
+// * VMOVNTDQA m128, xmm [AVX]
+// * VMOVNTDQA m256, ymm [AVX2]
+// * VMOVNTDQA m512, zmm [AVX512F]
+// * VMOVNTDQA m128, xmm [AVX512F,AVX512VL]
+// * VMOVNTDQA m256, ymm [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVNTDQA(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVNTDQA", 2, Operands { v0, v1 })
+ // VMOVNTDQA m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVNTDQA m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVNTDQA m512, zmm
+ if isM512(v0) && isZMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVNTDQA m128, xmm
+ if isM128(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVNTDQA m256, ymm
+ if isM256(v0) && isEVEXYMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2a)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVNTDQA")
+ }
+ return p
+}
+
+// VMOVNTPD performs "Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint".
+//
+// Mnemonic : VMOVNTPD
+// Supported forms : (5 forms)
+//
+// * VMOVNTPD xmm, m128 [AVX]
+// * VMOVNTPD ymm, m256 [AVX]
+// * VMOVNTPD zmm, m512 [AVX512F]
+// * VMOVNTPD xmm, m128 [AVX512F,AVX512VL]
+// * VMOVNTPD ymm, m256 [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVNTPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVNTPD", 2, Operands { v0, v1 })
+ // VMOVNTPD xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVNTPD ymm, m256
+ if isYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVNTPD zmm, m512
+ if isZMM(v0) && isM512(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVNTPD xmm, m128
+ if isEVEXXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVNTPD ymm, m256
+ if isEVEXYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVNTPD")
+ }
+ return p
+}
+
+// VMOVNTPS performs "Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint".
+//
+// Mnemonic : VMOVNTPS
+// Supported forms : (5 forms)
+//
+// * VMOVNTPS xmm, m128 [AVX]
+// * VMOVNTPS ymm, m256 [AVX]
+// * VMOVNTPS zmm, m512 [AVX512F]
+// * VMOVNTPS xmm, m128 [AVX512F,AVX512VL]
+// * VMOVNTPS ymm, m256 [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVNTPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVNTPS", 2, Operands { v0, v1 })
+ // VMOVNTPS xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVNTPS ymm, m256
+ if isYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVNTPS zmm, m512
+ if isZMM(v0) && isM512(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVNTPS xmm, m128
+ if isEVEXXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVNTPS ymm, m256
+ if isEVEXYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x2b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVNTPS")
+ }
+ return p
+}
+
+// VMOVQ performs "Move Quadword".
+//
+// Mnemonic : VMOVQ
+// Supported forms : (10 forms)
+//
+// * VMOVQ xmm, r64 [AVX]
+// * VMOVQ r64, xmm [AVX]
+// * VMOVQ xmm, xmm [AVX]
+// * VMOVQ m64, xmm [AVX]
+// * VMOVQ xmm, m64 [AVX]
+// * VMOVQ xmm, r64 [AVX512F]
+// * VMOVQ r64, xmm [AVX512F]
+// * VMOVQ xmm, xmm [AVX512F]
+// * VMOVQ m64, xmm [AVX512F]
+// * VMOVQ xmm, m64 [AVX512F]
+//
+func (self *Program) VMOVQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVQ", 2, Operands { v0, v1 })
+ // VMOVQ xmm, r64
+ if isXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[0]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xf9)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVQ r64, xmm
+ if isReg64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9)
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), v[0], 0)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), v[1], 0)
+ m.emit(0xd6)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVQ m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x81, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x6e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVQ xmm, m64
+ if isXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0xd6)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b1, 0x81, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVQ xmm, r64
+ if isEVEXXMM(v0) && isReg64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit(0x08)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVQ r64, xmm
+ if isReg64(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit(0x08)
+ m.emit(0x6e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVQ xmm, xmm
+ if isEVEXXMM(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x08)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit(0x08)
+ m.emit(0xd6)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVQ m64, xmm
+ if isM64(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x6e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VMOVQ xmm, m64
+ if isEVEXXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0x7e)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0)
+ m.emit(0xd6)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVQ")
+ }
+ return p
+}
+
+// VMOVSD performs "Move Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VMOVSD
+// Supported forms : (6 forms)
+//
+// * VMOVSD m64, xmm [AVX]
+// * VMOVSD xmm, m64 [AVX]
+// * VMOVSD xmm, xmm, xmm [AVX]
+// * VMOVSD xmm, m64{k} [AVX512F]
+// * VMOVSD m64, xmm{k}{z} [AVX512F]
+// * VMOVSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VMOVSD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMOVSD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VMOVSD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VMOVSD takes 2 or 3 operands")
+ }
+ // VMOVSD m64, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVSD xmm, m64
+ if len(vv) == 0 && isXMM(v0) && isM64(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVSD xmm, xmm, xmm
+ if len(vv) == 1 && isXMM(v0) && isXMM(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[0]), v[2], hlcode(v[1]))
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2]))
+ })
+ }
+ // VMOVSD xmm, m64{k}
+ if len(vv) == 0 && isEVEXXMM(v0) && isM64k(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VMOVSD m64, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VMOVSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVSD")
+ }
+ return p
+}
+
+// VMOVSHDUP performs "Move Packed Single-FP High and Duplicate".
+//
+// Mnemonic : VMOVSHDUP
+// Supported forms : (10 forms)
+//
+// * VMOVSHDUP xmm, xmm [AVX]
+// * VMOVSHDUP m128, xmm [AVX]
+// * VMOVSHDUP ymm, ymm [AVX]
+// * VMOVSHDUP m256, ymm [AVX]
+// * VMOVSHDUP zmm, zmm{k}{z} [AVX512F]
+// * VMOVSHDUP m512, zmm{k}{z} [AVX512F]
+// * VMOVSHDUP xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVSHDUP ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVSHDUP m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVSHDUP m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVSHDUP(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVSHDUP", 2, Operands { v0, v1 })
+ // VMOVSHDUP xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), v[0], 0)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSHDUP m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVSHDUP ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), v[0], 0)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSHDUP m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVSHDUP zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSHDUP m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVSHDUP xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSHDUP ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSHDUP m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVSHDUP m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVSHDUP")
+ }
+ return p
+}
+
+// VMOVSLDUP performs "Move Packed Single-FP Low and Duplicate".
+//
+// Mnemonic : VMOVSLDUP
+// Supported forms : (10 forms)
+//
+// * VMOVSLDUP xmm, xmm [AVX]
+// * VMOVSLDUP m128, xmm [AVX]
+// * VMOVSLDUP ymm, ymm [AVX]
+// * VMOVSLDUP m256, ymm [AVX]
+// * VMOVSLDUP zmm, zmm{k}{z} [AVX512F]
+// * VMOVSLDUP m512, zmm{k}{z} [AVX512F]
+// * VMOVSLDUP xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVSLDUP ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVSLDUP m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVSLDUP m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVSLDUP(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVSLDUP", 2, Operands { v0, v1 })
+ // VMOVSLDUP xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), v[0], 0)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSLDUP m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVSLDUP ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), v[0], 0)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSLDUP m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVSLDUP zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSLDUP m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVSLDUP xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSLDUP ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMOVSLDUP m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVSLDUP m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVSLDUP")
+ }
+ return p
+}
+
+// VMOVSS performs "Move Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMOVSS
+// Supported forms : (6 forms)
+//
+// * VMOVSS m32, xmm [AVX]
+// * VMOVSS xmm, m32 [AVX]
+// * VMOVSS xmm, xmm, xmm [AVX]
+// * VMOVSS xmm, m32{k} [AVX512F]
+// * VMOVSS m32, xmm{k}{z} [AVX512F]
+// * VMOVSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VMOVSS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMOVSS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VMOVSS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VMOVSS takes 2 or 3 operands")
+ }
+ // VMOVSS m32, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVSS xmm, m32
+ if len(vv) == 0 && isXMM(v0) && isM32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVSS xmm, xmm, xmm
+ if len(vv) == 1 && isXMM(v0) && isXMM(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[0]), v[2], hlcode(v[1]))
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2]))
+ })
+ }
+ // VMOVSS xmm, m32{k}
+ if len(vv) == 0 && isEVEXXMM(v0) && isM32k(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VMOVSS m32, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VMOVSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVSS")
+ }
+ return p
+}
+
+// VMOVUPD performs "Move Unaligned Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VMOVUPD
+// Supported forms : (15 forms)
+//
+// * VMOVUPD xmm, xmm [AVX]
+// * VMOVUPD m128, xmm [AVX]
+// * VMOVUPD ymm, ymm [AVX]
+// * VMOVUPD m256, ymm [AVX]
+// * VMOVUPD xmm, m128 [AVX]
+// * VMOVUPD ymm, m256 [AVX]
+// * VMOVUPD zmm, m512{k}{z} [AVX512F]
+// * VMOVUPD zmm, zmm{k}{z} [AVX512F]
+// * VMOVUPD m512, zmm{k}{z} [AVX512F]
+// * VMOVUPD xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPD ymm, m256{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPD m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPD m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVUPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVUPD", 2, Operands { v0, v1 })
+ // VMOVUPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), v[1], 0)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVUPD ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), v[0], 0)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[0]), v[1], 0)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPD m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVUPD xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVUPD ymm, m256
+ if isYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVUPD zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVUPD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPD m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVUPD xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVUPD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPD ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVUPD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPD m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVUPD m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVUPD")
+ }
+ return p
+}
+
+// VMOVUPS performs "Move Unaligned Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMOVUPS
+// Supported forms : (15 forms)
+//
+// * VMOVUPS xmm, xmm [AVX]
+// * VMOVUPS m128, xmm [AVX]
+// * VMOVUPS ymm, ymm [AVX]
+// * VMOVUPS m256, ymm [AVX]
+// * VMOVUPS xmm, m128 [AVX]
+// * VMOVUPS ymm, m256 [AVX]
+// * VMOVUPS zmm, m512{k}{z} [AVX512F]
+// * VMOVUPS zmm, zmm{k}{z} [AVX512F]
+// * VMOVUPS m512, zmm{k}{z} [AVX512F]
+// * VMOVUPS xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPS ymm, m256{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPS m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMOVUPS m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMOVUPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VMOVUPS", 2, Operands { v0, v1 })
+ // VMOVUPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[0]), v[1], 0)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVUPS ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), v[0], 0)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[0]), v[1], 0)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPS m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VMOVUPS xmm, m128
+ if isXMM(v0) && isM128(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVUPS ymm, m256
+ if isYMM(v0) && isM256(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[0]), addr(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ // VMOVUPS zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 64)
+ })
+ }
+ // VMOVUPS zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPS m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VMOVUPS xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VMOVUPS xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPS ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VMOVUPS ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMOVUPS m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VMOVUPS m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMOVUPS")
+ }
+ return p
+}
+
+// VMPSADBW performs "Compute Multiple Packed Sums of Absolute Difference".
+//
+// Mnemonic : VMPSADBW
+// Supported forms : (4 forms)
+//
+// * VMPSADBW imm8, xmm, xmm, xmm [AVX]
+// * VMPSADBW imm8, m128, xmm, xmm [AVX]
+// * VMPSADBW imm8, ymm, ymm, ymm [AVX2]
+// * VMPSADBW imm8, m256, ymm, ymm [AVX2]
+//
+func (self *Program) VMPSADBW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VMPSADBW", 4, Operands { v0, v1, v2, v3 })
+ // VMPSADBW imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VMPSADBW imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x42)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VMPSADBW imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x42)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VMPSADBW imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x42)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMPSADBW")
+ }
+ return p
+}
+
+// VMULPD performs "Multiply Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VMULPD
+// Supported forms : (11 forms)
+//
+// * VMULPD xmm, xmm, xmm [AVX]
+// * VMULPD m128, xmm, xmm [AVX]
+// * VMULPD ymm, ymm, ymm [AVX]
+// * VMULPD m256, ymm, ymm [AVX]
+// * VMULPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VMULPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMULPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMULPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMULPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMULPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMULPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMULPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMULPD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMULPD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMULPD takes 3 or 4 operands")
+ }
+ // VMULPD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULPD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMULPD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULPD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMULPD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VMULPD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMULPD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULPD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VMULPD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULPD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VMULPD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMULPD")
+ }
+ return p
+}
+
+// VMULPS performs "Multiply Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMULPS
+// Supported forms : (11 forms)
+//
+// * VMULPS xmm, xmm, xmm [AVX]
+// * VMULPS m128, xmm, xmm [AVX]
+// * VMULPS ymm, ymm, ymm [AVX]
+// * VMULPS m256, ymm, ymm [AVX]
+// * VMULPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VMULPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMULPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VMULPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMULPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VMULPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VMULPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VMULPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMULPS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMULPS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMULPS takes 3 or 4 operands")
+ }
+ // VMULPS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULPS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMULPS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULPS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMULPS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VMULPS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMULPS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULPS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VMULPS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULPS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VMULPS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMULPS")
+ }
+ return p
+}
+
+// VMULSD performs "Multiply Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VMULSD
+// Supported forms : (5 forms)
+//
+// * VMULSD xmm, xmm, xmm [AVX]
+// * VMULSD m64, xmm, xmm [AVX]
+// * VMULSD m64, xmm, xmm{k}{z} [AVX512F]
+// * VMULSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VMULSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VMULSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMULSD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMULSD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMULSD takes 3 or 4 operands")
+ }
+ // VMULSD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULSD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMULSD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VMULSD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMULSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMULSD")
+ }
+ return p
+}
+
+// VMULSS performs "Multiply Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VMULSS
+// Supported forms : (5 forms)
+//
+// * VMULSS xmm, xmm, xmm [AVX]
+// * VMULSS m32, xmm, xmm [AVX]
+// * VMULSS m32, xmm, xmm{k}{z} [AVX512F]
+// * VMULSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VMULSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VMULSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VMULSS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VMULSS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VMULSS takes 3 or 4 operands")
+ }
+ // VMULSS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VMULSS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VMULSS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VMULSS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VMULSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VMULSS")
+ }
+ return p
+}
+
+// VORPD performs "Bitwise Logical OR of Double-Precision Floating-Point Values".
+//
+// Mnemonic : VORPD
+// Supported forms : (10 forms)
+//
+// * VORPD xmm, xmm, xmm [AVX]
+// * VORPD m128, xmm, xmm [AVX]
+// * VORPD ymm, ymm, ymm [AVX]
+// * VORPD m256, ymm, ymm [AVX]
+// * VORPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VORPD zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VORPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VORPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VORPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VORPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VORPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VORPD", 3, Operands { v0, v1, v2 })
+ // VORPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VORPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VORPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VORPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VORPD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VORPD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VORPD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VORPD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VORPD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VORPD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VORPD")
+ }
+ return p
+}
+
+// VORPS performs "Bitwise Logical OR of Single-Precision Floating-Point Values".
+//
+// Mnemonic : VORPS
+// Supported forms : (10 forms)
+//
+// * VORPS xmm, xmm, xmm [AVX]
+// * VORPS m128, xmm, xmm [AVX]
+// * VORPS ymm, ymm, ymm [AVX]
+// * VORPS m256, ymm, ymm [AVX]
+// * VORPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VORPS zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VORPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VORPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VORPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VORPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VORPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VORPS", 3, Operands { v0, v1, v2 })
+ // VORPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VORPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VORPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VORPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VORPS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VORPS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VORPS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VORPS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VORPS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VORPS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VORPS")
+ }
+ return p
+}
+
+// VPABSB performs "Packed Absolute Value of Byte Integers".
+//
+// Mnemonic : VPABSB
+// Supported forms : (10 forms)
+//
+// * VPABSB xmm, xmm [AVX]
+// * VPABSB m128, xmm [AVX]
+// * VPABSB ymm, ymm [AVX2]
+// * VPABSB m256, ymm [AVX2]
+// * VPABSB zmm, zmm{k}{z} [AVX512BW]
+// * VPABSB m512, zmm{k}{z} [AVX512BW]
+// * VPABSB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPABSB ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPABSB m128, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPABSB m256, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPABSB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPABSB", 2, Operands { v0, v1 })
+ // VPABSB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x1c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSB m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x1c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPABSB ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x1c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSB m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x1c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPABSB zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x1c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSB m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPABSB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x1c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSB ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x1c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSB m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPABSB m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPABSB")
+ }
+ return p
+}
+
+// VPABSD performs "Packed Absolute Value of Doubleword Integers".
+//
+// Mnemonic : VPABSD
+// Supported forms : (10 forms)
+//
+// * VPABSD xmm, xmm [AVX]
+// * VPABSD m128, xmm [AVX]
+// * VPABSD ymm, ymm [AVX2]
+// * VPABSD m256, ymm [AVX2]
+// * VPABSD m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VPABSD zmm, zmm{k}{z} [AVX512F]
+// * VPABSD m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPABSD m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPABSD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPABSD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPABSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPABSD", 2, Operands { v0, v1 })
+ // VPABSD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x1e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPABSD ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSD m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x1e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPABSD m512/m32bcst, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x1e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPABSD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSD m128/m32bcst, xmm{k}{z}
+ if isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x1e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPABSD m256/m32bcst, ymm{k}{z}
+ if isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x1e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPABSD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPABSD")
+ }
+ return p
+}
+
+// VPABSQ performs "Packed Absolute Value of Quadword Integers".
+//
+// Mnemonic : VPABSQ
+// Supported forms : (6 forms)
+//
+// * VPABSQ m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VPABSQ zmm, zmm{k}{z} [AVX512F]
+// * VPABSQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPABSQ m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPABSQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPABSQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPABSQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPABSQ", 2, Operands { v0, v1 })
+ // VPABSQ m512/m64bcst, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x1f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPABSQ zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x1f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSQ m128/m64bcst, xmm{k}{z}
+ if isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x1f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPABSQ m256/m64bcst, ymm{k}{z}
+ if isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x1f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPABSQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x1f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSQ ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x1f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPABSQ")
+ }
+ return p
+}
+
+// VPABSW performs "Packed Absolute Value of Word Integers".
+//
+// Mnemonic : VPABSW
+// Supported forms : (10 forms)
+//
+// * VPABSW xmm, xmm [AVX]
+// * VPABSW m128, xmm [AVX]
+// * VPABSW ymm, ymm [AVX2]
+// * VPABSW m256, ymm [AVX2]
+// * VPABSW zmm, zmm{k}{z} [AVX512BW]
+// * VPABSW m512, zmm{k}{z} [AVX512BW]
+// * VPABSW xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPABSW ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPABSW m128, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPABSW m256, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPABSW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPABSW", 2, Operands { v0, v1 })
+ // VPABSW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPABSW ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSW m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPABSW zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSW m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPABSW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSW ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x1d)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPABSW m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPABSW m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x1d)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPABSW")
+ }
+ return p
+}
+
+// VPACKSSDW performs "Pack Doublewords into Words with Signed Saturation".
+//
+// Mnemonic : VPACKSSDW
+// Supported forms : (10 forms)
+//
+// * VPACKSSDW xmm, xmm, xmm [AVX]
+// * VPACKSSDW m128, xmm, xmm [AVX]
+// * VPACKSSDW ymm, ymm, ymm [AVX2]
+// * VPACKSSDW m256, ymm, ymm [AVX2]
+// * VPACKSSDW m512/m32bcst, zmm, zmm{k}{z} [AVX512BW]
+// * VPACKSSDW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPACKSSDW m128/m32bcst, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKSSDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKSSDW m256/m32bcst, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKSSDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPACKSSDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPACKSSDW", 3, Operands { v0, v1, v2 })
+ // VPACKSSDW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKSSDW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x6b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPACKSSDW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKSSDW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x6b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPACKSSDW m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPACKSSDW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKSSDW m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPACKSSDW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKSSDW m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPACKSSDW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x6b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPACKSSDW")
+ }
+ return p
+}
+
+// VPACKSSWB performs "Pack Words into Bytes with Signed Saturation".
+//
+// Mnemonic : VPACKSSWB
+// Supported forms : (10 forms)
+//
+// * VPACKSSWB xmm, xmm, xmm [AVX]
+// * VPACKSSWB m128, xmm, xmm [AVX]
+// * VPACKSSWB ymm, ymm, ymm [AVX2]
+// * VPACKSSWB m256, ymm, ymm [AVX2]
+// * VPACKSSWB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPACKSSWB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPACKSSWB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKSSWB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKSSWB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKSSWB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPACKSSWB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPACKSSWB", 3, Operands { v0, v1, v2 })
+ // VPACKSSWB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKSSWB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x63)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPACKSSWB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKSSWB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x63)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPACKSSWB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKSSWB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x63)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPACKSSWB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKSSWB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x63)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPACKSSWB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKSSWB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x63)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPACKSSWB")
+ }
+ return p
+}
+
+// VPACKUSDW performs "Pack Doublewords into Words with Unsigned Saturation".
+//
+// Mnemonic : VPACKUSDW
+// Supported forms : (10 forms)
+//
+// * VPACKUSDW xmm, xmm, xmm [AVX]
+// * VPACKUSDW m128, xmm, xmm [AVX]
+// * VPACKUSDW ymm, ymm, ymm [AVX2]
+// * VPACKUSDW m256, ymm, ymm [AVX2]
+// * VPACKUSDW m512/m32bcst, zmm, zmm{k}{z} [AVX512BW]
+// * VPACKUSDW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPACKUSDW m128/m32bcst, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKUSDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKUSDW m256/m32bcst, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKUSDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPACKUSDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPACKUSDW", 3, Operands { v0, v1, v2 })
+ // VPACKUSDW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x2b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKUSDW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPACKUSDW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x2b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKUSDW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x2b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPACKUSDW m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x2b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPACKUSDW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x2b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKUSDW m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x2b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPACKUSDW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x2b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKUSDW m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x2b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPACKUSDW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x2b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPACKUSDW")
+ }
+ return p
+}
+
+// VPACKUSWB performs "Pack Words into Bytes with Unsigned Saturation".
+//
+// Mnemonic : VPACKUSWB
+// Supported forms : (10 forms)
+//
+// * VPACKUSWB xmm, xmm, xmm [AVX]
+// * VPACKUSWB m128, xmm, xmm [AVX]
+// * VPACKUSWB ymm, ymm, ymm [AVX2]
+// * VPACKUSWB m256, ymm, ymm [AVX2]
+// * VPACKUSWB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPACKUSWB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPACKUSWB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKUSWB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKUSWB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPACKUSWB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPACKUSWB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPACKUSWB", 3, Operands { v0, v1, v2 })
+ // VPACKUSWB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x67)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKUSWB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x67)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPACKUSWB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x67)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKUSWB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x67)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPACKUSWB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x67)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKUSWB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x67)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPACKUSWB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x67)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKUSWB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x67)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPACKUSWB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x67)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPACKUSWB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x67)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPACKUSWB")
+ }
+ return p
+}
+
+// VPADDB performs "Add Packed Byte Integers".
+//
+// Mnemonic : VPADDB
+// Supported forms : (10 forms)
+//
+// * VPADDB xmm, xmm, xmm [AVX]
+// * VPADDB m128, xmm, xmm [AVX]
+// * VPADDB ymm, ymm, ymm [AVX2]
+// * VPADDB m256, ymm, ymm [AVX2]
+// * VPADDB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPADDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPADDB", 3, Operands { v0, v1, v2 })
+ // VPADDB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xfc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xfc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPADDB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xfc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xfc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPADDB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xfc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xfc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPADDB")
+ }
+ return p
+}
+
+// VPADDD performs "Add Packed Doubleword Integers".
+//
+// Mnemonic : VPADDD
+// Supported forms : (10 forms)
+//
+// * VPADDD xmm, xmm, xmm [AVX]
+// * VPADDD m128, xmm, xmm [AVX]
+// * VPADDD ymm, ymm, ymm [AVX2]
+// * VPADDD m256, ymm, ymm [AVX2]
+// * VPADDD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPADDD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPADDD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPADDD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPADDD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPADDD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPADDD", 3, Operands { v0, v1, v2 })
+ // VPADDD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xfe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPADDD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xfe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xfe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPADDD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xfe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xfe)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPADDD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xfe)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPADDD")
+ }
+ return p
+}
+
+// VPADDQ performs "Add Packed Quadword Integers".
+//
+// Mnemonic : VPADDQ
+// Supported forms : (10 forms)
+//
+// * VPADDQ xmm, xmm, xmm [AVX]
+// * VPADDQ m128, xmm, xmm [AVX]
+// * VPADDQ ymm, ymm, ymm [AVX2]
+// * VPADDQ m256, ymm, ymm [AVX2]
+// * VPADDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPADDQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPADDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPADDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPADDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPADDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPADDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPADDQ", 3, Operands { v0, v1, v2 })
+ // VPADDQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xd4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPADDQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xd4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xd4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPADDQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xd4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xd4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPADDQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xd4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPADDQ")
+ }
+ return p
+}
+
+// VPADDSB performs "Add Packed Signed Byte Integers with Signed Saturation".
+//
+// Mnemonic : VPADDSB
+// Supported forms : (10 forms)
+//
+// * VPADDSB xmm, xmm, xmm [AVX]
+// * VPADDSB m128, xmm, xmm [AVX]
+// * VPADDSB ymm, ymm, ymm [AVX2]
+// * VPADDSB m256, ymm, ymm [AVX2]
+// * VPADDSB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDSB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPADDSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPADDSB", 3, Operands { v0, v1, v2 })
+ // VPADDSB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xec)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xec)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDSB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xec)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xec)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDSB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xec)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xec)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPADDSB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xec)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xec)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPADDSB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xec)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xec)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPADDSB")
+ }
+ return p
+}
+
+// VPADDSW performs "Add Packed Signed Word Integers with Signed Saturation".
+//
+// Mnemonic : VPADDSW
+// Supported forms : (10 forms)
+//
+// * VPADDSW xmm, xmm, xmm [AVX]
+// * VPADDSW m128, xmm, xmm [AVX]
+// * VPADDSW ymm, ymm, ymm [AVX2]
+// * VPADDSW m256, ymm, ymm [AVX2]
+// * VPADDSW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDSW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPADDSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPADDSW", 3, Operands { v0, v1, v2 })
+ // VPADDSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xed)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xed)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xed)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xed)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDSW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xed)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xed)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPADDSW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xed)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xed)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPADDSW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xed)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDSW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xed)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPADDSW")
+ }
+ return p
+}
+
+// VPADDUSB performs "Add Packed Unsigned Byte Integers with Unsigned Saturation".
+//
+// Mnemonic : VPADDUSB
+// Supported forms : (10 forms)
+//
+// * VPADDUSB xmm, xmm, xmm [AVX]
+// * VPADDUSB m128, xmm, xmm [AVX]
+// * VPADDUSB ymm, ymm, ymm [AVX2]
+// * VPADDUSB m256, ymm, ymm [AVX2]
+// * VPADDUSB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDUSB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDUSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDUSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDUSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDUSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPADDUSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPADDUSB", 3, Operands { v0, v1, v2 })
+ // VPADDUSB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xdc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDUSB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xdc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDUSB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xdc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xdc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPADDUSB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xdc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xdc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPADDUSB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xdc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xdc)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPADDUSB")
+ }
+ return p
+}
+
+// VPADDUSW performs "Add Packed Unsigned Word Integers with Unsigned Saturation".
+//
+// Mnemonic : VPADDUSW
+// Supported forms : (10 forms)
+//
+// * VPADDUSW xmm, xmm, xmm [AVX]
+// * VPADDUSW m128, xmm, xmm [AVX]
+// * VPADDUSW ymm, ymm, ymm [AVX2]
+// * VPADDUSW m256, ymm, ymm [AVX2]
+// * VPADDUSW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDUSW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDUSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDUSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDUSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDUSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPADDUSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPADDUSW", 3, Operands { v0, v1, v2 })
+ // VPADDUSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xdd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDUSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xdd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDUSW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xdd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xdd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPADDUSW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xdd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xdd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPADDUSW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xdd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDUSW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xdd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPADDUSW")
+ }
+ return p
+}
+
+// VPADDW performs "Add Packed Word Integers".
+//
+// Mnemonic : VPADDW
+// Supported forms : (10 forms)
+//
+// * VPADDW xmm, xmm, xmm [AVX]
+// * VPADDW m128, xmm, xmm [AVX]
+// * VPADDW ymm, ymm, ymm [AVX2]
+// * VPADDW m256, ymm, ymm [AVX2]
+// * VPADDW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPADDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPADDW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPADDW", 3, Operands { v0, v1, v2 })
+ // VPADDW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPADDW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xfd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xfd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPADDW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xfd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xfd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPADDW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xfd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPADDW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xfd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPADDW")
+ }
+ return p
+}
+
+// VPALIGNR performs "Packed Align Right".
+//
+// Mnemonic : VPALIGNR
+// Supported forms : (10 forms)
+//
+// * VPALIGNR imm8, xmm, xmm, xmm [AVX]
+// * VPALIGNR imm8, m128, xmm, xmm [AVX]
+// * VPALIGNR imm8, ymm, ymm, ymm [AVX2]
+// * VPALIGNR imm8, m256, ymm, ymm [AVX2]
+// * VPALIGNR imm8, zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPALIGNR imm8, m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPALIGNR imm8, xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPALIGNR imm8, m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPALIGNR imm8, ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPALIGNR imm8, m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPALIGNR(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPALIGNR", 4, Operands { v0, v1, v2, v3 })
+ // VPALIGNR imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPALIGNR imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPALIGNR imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPALIGNR imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPALIGNR imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPALIGNR imm8, m512, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPALIGNR imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPALIGNR imm8, m128, xmm, xmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPALIGNR imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPALIGNR imm8, m256, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPALIGNR")
+ }
+ return p
+}
+
+// VPAND performs "Packed Bitwise Logical AND".
+//
+// Mnemonic : VPAND
+// Supported forms : (4 forms)
+//
+// * VPAND xmm, xmm, xmm [AVX]
+// * VPAND m128, xmm, xmm [AVX]
+// * VPAND ymm, ymm, ymm [AVX2]
+// * VPAND m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPAND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPAND", 3, Operands { v0, v1, v2 })
+ // VPAND xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAND m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPAND ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAND m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPAND")
+ }
+ return p
+}
+
+// VPANDD performs "Bitwise Logical AND of Packed Doubleword Integers".
+//
+// Mnemonic : VPANDD
+// Supported forms : (6 forms)
+//
+// * VPANDD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPANDD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPANDD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPANDD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPANDD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPANDD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPANDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPANDD", 3, Operands { v0, v1, v2 })
+ // VPANDD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPANDD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPANDD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPANDD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPANDD")
+ }
+ return p
+}
+
+// VPANDN performs "Packed Bitwise Logical AND NOT".
+//
+// Mnemonic : VPANDN
+// Supported forms : (4 forms)
+//
+// * VPANDN xmm, xmm, xmm [AVX]
+// * VPANDN m128, xmm, xmm [AVX]
+// * VPANDN ymm, ymm, ymm [AVX2]
+// * VPANDN m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPANDN(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPANDN", 3, Operands { v0, v1, v2 })
+ // VPANDN xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDN m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPANDN ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDN m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPANDN")
+ }
+ return p
+}
+
+// VPANDND performs "Bitwise Logical AND NOT of Packed Doubleword Integers".
+//
+// Mnemonic : VPANDND
+// Supported forms : (6 forms)
+//
+// * VPANDND m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPANDND zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPANDND m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPANDND xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPANDND m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPANDND ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPANDND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPANDND", 3, Operands { v0, v1, v2 })
+ // VPANDND m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPANDND zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDND m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPANDND xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDND m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPANDND ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPANDND")
+ }
+ return p
+}
+
+// VPANDNQ performs "Bitwise Logical AND NOT of Packed Quadword Integers".
+//
+// Mnemonic : VPANDNQ
+// Supported forms : (6 forms)
+//
+// * VPANDNQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPANDNQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPANDNQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPANDNQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPANDNQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPANDNQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPANDNQ", 3, Operands { v0, v1, v2 })
+ // VPANDNQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPANDNQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDNQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPANDNQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDNQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdf)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPANDNQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xdf)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPANDNQ")
+ }
+ return p
+}
+
+// VPANDQ performs "Bitwise Logical AND of Packed Quadword Integers".
+//
+// Mnemonic : VPANDQ
+// Supported forms : (6 forms)
+//
+// * VPANDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPANDQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPANDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPANDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPANDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPANDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPANDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPANDQ", 3, Operands { v0, v1, v2 })
+ // VPANDQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPANDQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPANDQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPANDQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xdb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPANDQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPANDQ")
+ }
+ return p
+}
+
+// VPAVGB performs "Average Packed Byte Integers".
+//
+// Mnemonic : VPAVGB
+// Supported forms : (10 forms)
+//
+// * VPAVGB xmm, xmm, xmm [AVX]
+// * VPAVGB m128, xmm, xmm [AVX]
+// * VPAVGB ymm, ymm, ymm [AVX2]
+// * VPAVGB m256, ymm, ymm [AVX2]
+// * VPAVGB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPAVGB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPAVGB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPAVGB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPAVGB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPAVGB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPAVGB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPAVGB", 3, Operands { v0, v1, v2 })
+ // VPAVGB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe0)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPAVGB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe0)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPAVGB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xe0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe0)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPAVGB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xe0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe0)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPAVGB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xe0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe0)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPAVGB")
+ }
+ return p
+}
+
+// VPAVGW performs "Average Packed Word Integers".
+//
+// Mnemonic : VPAVGW
+// Supported forms : (10 forms)
+//
+// * VPAVGW xmm, xmm, xmm [AVX]
+// * VPAVGW m128, xmm, xmm [AVX]
+// * VPAVGW ymm, ymm, ymm [AVX2]
+// * VPAVGW m256, ymm, ymm [AVX2]
+// * VPAVGW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPAVGW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPAVGW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPAVGW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPAVGW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPAVGW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPAVGW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPAVGW", 3, Operands { v0, v1, v2 })
+ // VPAVGW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPAVGW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPAVGW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xe3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPAVGW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xe3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPAVGW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xe3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPAVGW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPAVGW")
+ }
+ return p
+}
+
+// VPBLENDD performs "Blend Packed Doublewords".
+//
+// Mnemonic : VPBLENDD
+// Supported forms : (4 forms)
+//
+// * VPBLENDD imm8, xmm, xmm, xmm [AVX2]
+// * VPBLENDD imm8, m128, xmm, xmm [AVX2]
+// * VPBLENDD imm8, ymm, ymm, ymm [AVX2]
+// * VPBLENDD imm8, m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPBLENDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPBLENDD", 4, Operands { v0, v1, v2, v3 })
+ // VPBLENDD imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x02)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPBLENDD imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x02)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPBLENDD imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x02)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPBLENDD imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x02)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBLENDD")
+ }
+ return p
+}
+
+// VPBLENDMB performs "Blend Byte Vectors Using an OpMask Control".
+//
+// Mnemonic : VPBLENDMB
+// Supported forms : (6 forms)
+//
+// * VPBLENDMB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPBLENDMB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPBLENDMB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBLENDMB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBLENDMB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPBLENDMB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPBLENDMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPBLENDMB", 3, Operands { v0, v1, v2 })
+ // VPBLENDMB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPBLENDMB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPBLENDMB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBLENDMB")
+ }
+ return p
+}
+
+// VPBLENDMD performs "Blend Doubleword Vectors Using an OpMask Control".
+//
+// Mnemonic : VPBLENDMD
+// Supported forms : (6 forms)
+//
+// * VPBLENDMD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPBLENDMD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPBLENDMD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBLENDMD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBLENDMD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPBLENDMD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPBLENDMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPBLENDMD", 3, Operands { v0, v1, v2 })
+ // VPBLENDMD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPBLENDMD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPBLENDMD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPBLENDMD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBLENDMD")
+ }
+ return p
+}
+
+// VPBLENDMQ performs "Blend Quadword Vectors Using an OpMask Control".
+//
+// Mnemonic : VPBLENDMQ
+// Supported forms : (6 forms)
+//
+// * VPBLENDMQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPBLENDMQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPBLENDMQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBLENDMQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBLENDMQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPBLENDMQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPBLENDMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPBLENDMQ", 3, Operands { v0, v1, v2 })
+ // VPBLENDMQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPBLENDMQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPBLENDMQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPBLENDMQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBLENDMQ")
+ }
+ return p
+}
+
+// VPBLENDMW performs "Blend Word Vectors Using an OpMask Control".
+//
+// Mnemonic : VPBLENDMW
+// Supported forms : (6 forms)
+//
+// * VPBLENDMW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPBLENDMW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPBLENDMW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBLENDMW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBLENDMW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPBLENDMW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPBLENDMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPBLENDMW", 3, Operands { v0, v1, v2 })
+ // VPBLENDMW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPBLENDMW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPBLENDMW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBLENDMW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBLENDMW")
+ }
+ return p
+}
+
+// VPBLENDVB performs "Variable Blend Packed Bytes".
+//
+// Mnemonic : VPBLENDVB
+// Supported forms : (4 forms)
+//
+// * VPBLENDVB xmm, xmm, xmm, xmm [AVX]
+// * VPBLENDVB xmm, m128, xmm, xmm [AVX]
+// * VPBLENDVB ymm, ymm, ymm, ymm [AVX2]
+// * VPBLENDVB ymm, m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPBLENDVB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPBLENDVB", 4, Operands { v0, v1, v2, v3 })
+ // VPBLENDVB xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPBLENDVB xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x4c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPBLENDVB ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPBLENDVB ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x4c)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBLENDVB")
+ }
+ return p
+}
+
+// VPBLENDW performs "Blend Packed Words".
+//
+// Mnemonic : VPBLENDW
+// Supported forms : (4 forms)
+//
+// * VPBLENDW imm8, xmm, xmm, xmm [AVX]
+// * VPBLENDW imm8, m128, xmm, xmm [AVX]
+// * VPBLENDW imm8, ymm, ymm, ymm [AVX2]
+// * VPBLENDW imm8, m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPBLENDW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPBLENDW", 4, Operands { v0, v1, v2, v3 })
+ // VPBLENDW imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x0e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPBLENDW imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPBLENDW imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x0e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPBLENDW imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBLENDW")
+ }
+ return p
+}
+
+// VPBROADCASTB performs "Broadcast Byte Integer".
+//
+// Mnemonic : VPBROADCASTB
+// Supported forms : (13 forms)
+//
+// * VPBROADCASTB xmm, xmm [AVX2]
+// * VPBROADCASTB m8, xmm [AVX2]
+// * VPBROADCASTB xmm, ymm [AVX2]
+// * VPBROADCASTB m8, ymm [AVX2]
+// * VPBROADCASTB r32, zmm{k}{z} [AVX512BW]
+// * VPBROADCASTB xmm, zmm{k}{z} [AVX512BW]
+// * VPBROADCASTB m8, zmm{k}{z} [AVX512BW]
+// * VPBROADCASTB r32, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTB r32, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTB xmm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTB m8, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTB m8, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPBROADCASTB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPBROADCASTB", 2, Operands { v0, v1 })
+ // VPBROADCASTB xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTB m8, xmm
+ if isM8(v0) && isXMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTB xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTB m8, ymm
+ if isM8(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTB r32, zmm{k}{z}
+ if isReg32(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTB xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTB m8, zmm{k}{z}
+ if isM8(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTB r32, xmm{k}{z}
+ if isReg32(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTB r32, ymm{k}{z}
+ if isReg32(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTB xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x78)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTB m8, xmm{k}{z}
+ if isM8(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTB m8, ymm{k}{z}
+ if isM8(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x78)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBROADCASTB")
+ }
+ return p
+}
+
+// VPBROADCASTD performs "Broadcast Doubleword Integer".
+//
+// Mnemonic : VPBROADCASTD
+// Supported forms : (13 forms)
+//
+// * VPBROADCASTD xmm, xmm [AVX2]
+// * VPBROADCASTD m32, xmm [AVX2]
+// * VPBROADCASTD xmm, ymm [AVX2]
+// * VPBROADCASTD m32, ymm [AVX2]
+// * VPBROADCASTD r32, zmm{k}{z} [AVX512F]
+// * VPBROADCASTD xmm, zmm{k}{z} [AVX512F]
+// * VPBROADCASTD m32, zmm{k}{z} [AVX512F]
+// * VPBROADCASTD r32, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTD r32, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTD xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTD m32, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTD m32, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPBROADCASTD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPBROADCASTD", 2, Operands { v0, v1 })
+ // VPBROADCASTD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTD m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x58)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTD xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTD m32, ymm
+ if isM32(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x58)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTD r32, zmm{k}{z}
+ if isReg32(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTD xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTD m32, zmm{k}{z}
+ if isM32(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x58)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPBROADCASTD r32, xmm{k}{z}
+ if isReg32(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTD r32, ymm{k}{z}
+ if isReg32(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTD xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x58)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTD m32, xmm{k}{z}
+ if isM32(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x58)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPBROADCASTD m32, ymm{k}{z}
+ if isM32(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x58)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBROADCASTD")
+ }
+ return p
+}
+
+// VPBROADCASTMB2Q performs "Broadcast Low Byte of Mask Register to Packed Quadword Values".
+//
+// Mnemonic : VPBROADCASTMB2Q
+// Supported forms : (3 forms)
+//
+// * VPBROADCASTMB2Q k, xmm [AVX512CD,AVX512VL]
+// * VPBROADCASTMB2Q k, ymm [AVX512CD,AVX512VL]
+// * VPBROADCASTMB2Q k, zmm [AVX512CD]
+//
+func (self *Program) VPBROADCASTMB2Q(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPBROADCASTMB2Q", 2, Operands { v0, v1 })
+ // VPBROADCASTMB2Q k, xmm
+ if isK(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x08)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTMB2Q k, ymm
+ if isK(v0) && isEVEXYMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x28)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTMB2Q k, zmm
+ if isK(v0) && isZMM(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x48)
+ m.emit(0x2a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBROADCASTMB2Q")
+ }
+ return p
+}
+
+// VPBROADCASTMW2D performs "Broadcast Low Word of Mask Register to Packed Doubleword Values".
+//
+// Mnemonic : VPBROADCASTMW2D
+// Supported forms : (3 forms)
+//
+// * VPBROADCASTMW2D k, xmm [AVX512CD,AVX512VL]
+// * VPBROADCASTMW2D k, ymm [AVX512CD,AVX512VL]
+// * VPBROADCASTMW2D k, zmm [AVX512CD]
+//
+func (self *Program) VPBROADCASTMW2D(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPBROADCASTMW2D", 2, Operands { v0, v1 })
+ // VPBROADCASTMW2D k, xmm
+ if isK(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x08)
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTMW2D k, ymm
+ if isK(v0) && isEVEXYMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x28)
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTMW2D k, zmm
+ if isK(v0) && isZMM(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x48)
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBROADCASTMW2D")
+ }
+ return p
+}
+
+// VPBROADCASTQ performs "Broadcast Quadword Integer".
+//
+// Mnemonic : VPBROADCASTQ
+// Supported forms : (13 forms)
+//
+// * VPBROADCASTQ xmm, xmm [AVX2]
+// * VPBROADCASTQ m64, xmm [AVX2]
+// * VPBROADCASTQ xmm, ymm [AVX2]
+// * VPBROADCASTQ m64, ymm [AVX2]
+// * VPBROADCASTQ r64, zmm{k}{z} [AVX512F]
+// * VPBROADCASTQ xmm, zmm{k}{z} [AVX512F]
+// * VPBROADCASTQ m64, zmm{k}{z} [AVX512F]
+// * VPBROADCASTQ r64, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTQ r64, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTQ m64, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPBROADCASTQ m64, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPBROADCASTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPBROADCASTQ", 2, Operands { v0, v1 })
+ // VPBROADCASTQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTQ m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTQ xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTQ m64, ymm
+ if isM64(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTQ r64, zmm{k}{z}
+ if isReg64(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTQ xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTQ m64, zmm{k}{z}
+ if isM64(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPBROADCASTQ r64, xmm{k}{z}
+ if isReg64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTQ r64, ymm{k}{z}
+ if isReg64(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTQ xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x59)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTQ m64, xmm{k}{z}
+ if isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPBROADCASTQ m64, ymm{k}{z}
+ if isM64(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x59)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBROADCASTQ")
+ }
+ return p
+}
+
+// VPBROADCASTW performs "Broadcast Word Integer".
+//
+// Mnemonic : VPBROADCASTW
+// Supported forms : (13 forms)
+//
+// * VPBROADCASTW xmm, xmm [AVX2]
+// * VPBROADCASTW m16, xmm [AVX2]
+// * VPBROADCASTW xmm, ymm [AVX2]
+// * VPBROADCASTW m16, ymm [AVX2]
+// * VPBROADCASTW r32, zmm{k}{z} [AVX512BW]
+// * VPBROADCASTW xmm, zmm{k}{z} [AVX512BW]
+// * VPBROADCASTW m16, zmm{k}{z} [AVX512BW]
+// * VPBROADCASTW r32, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTW r32, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTW xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTW xmm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTW m16, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPBROADCASTW m16, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPBROADCASTW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPBROADCASTW", 2, Operands { v0, v1 })
+ // VPBROADCASTW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTW m16, xmm
+ if isM16(v0) && isXMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTW xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTW m16, ymm
+ if isM16(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPBROADCASTW r32, zmm{k}{z}
+ if isReg32(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTW xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTW m16, zmm{k}{z}
+ if isM16(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 2)
+ })
+ }
+ // VPBROADCASTW r32, xmm{k}{z}
+ if isReg32(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTW r32, ymm{k}{z}
+ if isReg32(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x7b)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTW xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x79)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPBROADCASTW m16, xmm{k}{z}
+ if isM16(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 2)
+ })
+ }
+ // VPBROADCASTW m16, ymm{k}{z}
+ if isM16(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x79)
+ m.mrsd(lcode(v[1]), addr(v[0]), 2)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPBROADCASTW")
+ }
+ return p
+}
+
+// VPCLMULQDQ performs "Carry-Less Quadword Multiplication".
+//
+// Mnemonic : VPCLMULQDQ
+// Supported forms : (2 forms)
+//
+// * VPCLMULQDQ imm8, xmm, xmm, xmm [AVX,PCLMULQDQ]
+// * VPCLMULQDQ imm8, m128, xmm, xmm [AVX,PCLMULQDQ]
+//
+func (self *Program) VPCLMULQDQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCLMULQDQ", 4, Operands { v0, v1, v2, v3 })
+ // VPCLMULQDQ imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX | ISA_PCLMULQDQ)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCLMULQDQ imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX | ISA_PCLMULQDQ)
+ p.domain = DomainCrypto
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x44)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCLMULQDQ")
+ }
+ return p
+}
+
+// VPCMOV performs "Packed Conditional Move".
+//
+// Mnemonic : VPCMOV
+// Supported forms : (6 forms)
+//
+// * VPCMOV xmm, xmm, xmm, xmm [XOP]
+// * VPCMOV m128, xmm, xmm, xmm [XOP]
+// * VPCMOV xmm, m128, xmm, xmm [XOP]
+// * VPCMOV ymm, ymm, ymm, ymm [XOP]
+// * VPCMOV m256, ymm, ymm, ymm [XOP]
+// * VPCMOV ymm, m256, ymm, ymm [XOP]
+//
+func (self *Program) VPCMOV(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCMOV", 4, Operands { v0, v1, v2, v3 })
+ // VPCMOV xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xa2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[2]) << 3))
+ m.emit(0xa2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VPCMOV m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x80, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0xa2)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VPCMOV xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xa2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPCMOV ymm, ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit(0xa2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfc ^ (hlcode(v[2]) << 3))
+ m.emit(0xa2)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VPCMOV m256, ymm, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x84, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0xa2)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VPCMOV ymm, m256, ymm, ymm
+ if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x04, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xa2)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMOV")
+ }
+ return p
+}
+
+// VPCMPB performs "Compare Packed Signed Byte Values".
+//
+// Mnemonic : VPCMPB
+// Supported forms : (6 forms)
+//
+// * VPCMPB imm8, zmm, zmm, k{k} [AVX512BW]
+// * VPCMPB imm8, m512, zmm, k{k} [AVX512BW]
+// * VPCMPB imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPB imm8, m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPB imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPB imm8, m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPCMPB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCMPB", 4, Operands { v0, v1, v2, v3 })
+ // VPCMPB imm8, zmm, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPB imm8, m512, zmm, k{k}
+ if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPB imm8, xmm, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPB imm8, m128, xmm, k{k}
+ if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPB imm8, ymm, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPB imm8, m256, ymm, k{k}
+ if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPB")
+ }
+ return p
+}
+
+// VPCMPD performs "Compare Packed Signed Doubleword Values".
+//
+// Mnemonic : VPCMPD
+// Supported forms : (6 forms)
+//
+// * VPCMPD imm8, m512/m32bcst, zmm, k{k} [AVX512F]
+// * VPCMPD imm8, zmm, zmm, k{k} [AVX512F]
+// * VPCMPD imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPD imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPCMPD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCMPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCMPD", 4, Operands { v0, v1, v2, v3 })
+ // VPCMPD imm8, m512/m32bcst, zmm, k{k}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPD imm8, zmm, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x1f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPD imm8, m128/m32bcst, xmm, k{k}
+ if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPD imm8, xmm, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x1f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPD imm8, m256/m32bcst, ymm, k{k}
+ if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPD imm8, ymm, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x1f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPD")
+ }
+ return p
+}
+
+// VPCMPEQB performs "Compare Packed Byte Data for Equality".
+//
+// Mnemonic : VPCMPEQB
+// Supported forms : (10 forms)
+//
+// * VPCMPEQB xmm, xmm, xmm [AVX]
+// * VPCMPEQB m128, xmm, xmm [AVX]
+// * VPCMPEQB ymm, ymm, ymm [AVX2]
+// * VPCMPEQB m256, ymm, ymm [AVX2]
+// * VPCMPEQB zmm, zmm, k{k} [AVX512BW]
+// * VPCMPEQB m512, zmm, k{k} [AVX512BW]
+// * VPCMPEQB xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPEQB m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPEQB ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPEQB m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPCMPEQB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPEQB", 3, Operands { v0, v1, v2 })
+ // VPCMPEQB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x74)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x74)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPEQB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x74)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x74)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPEQB zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x74)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQB m512, zmm, k{k}
+ if isM512(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x74)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPCMPEQB xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x74)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQB m128, xmm, k{k}
+ if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x74)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPCMPEQB ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x74)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQB m256, ymm, k{k}
+ if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x74)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPEQB")
+ }
+ return p
+}
+
+// VPCMPEQD performs "Compare Packed Doubleword Data for Equality".
+//
+// Mnemonic : VPCMPEQD
+// Supported forms : (10 forms)
+//
+// * VPCMPEQD xmm, xmm, xmm [AVX]
+// * VPCMPEQD m128, xmm, xmm [AVX]
+// * VPCMPEQD ymm, ymm, ymm [AVX2]
+// * VPCMPEQD m256, ymm, ymm [AVX2]
+// * VPCMPEQD m512/m32bcst, zmm, k{k} [AVX512F]
+// * VPCMPEQD zmm, zmm, k{k} [AVX512F]
+// * VPCMPEQD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPEQD xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPEQD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPCMPEQD ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCMPEQD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPEQD", 3, Operands { v0, v1, v2 })
+ // VPCMPEQD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPEQD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPEQD m512/m32bcst, zmm, k{k}
+ if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPCMPEQD zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQD m128/m32bcst, xmm, k{k}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPCMPEQD xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQD m256/m32bcst, ymm, k{k}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPCMPEQD ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPEQD")
+ }
+ return p
+}
+
+// VPCMPEQQ performs "Compare Packed Quadword Data for Equality".
+//
+// Mnemonic : VPCMPEQQ
+// Supported forms : (10 forms)
+//
+// * VPCMPEQQ xmm, xmm, xmm [AVX]
+// * VPCMPEQQ m128, xmm, xmm [AVX]
+// * VPCMPEQQ ymm, ymm, ymm [AVX2]
+// * VPCMPEQQ m256, ymm, ymm [AVX2]
+// * VPCMPEQQ m512/m64bcst, zmm, k{k} [AVX512F]
+// * VPCMPEQQ zmm, zmm, k{k} [AVX512F]
+// * VPCMPEQQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPEQQ xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPEQQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPCMPEQQ ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCMPEQQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPEQQ", 3, Operands { v0, v1, v2 })
+ // VPCMPEQQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x29)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPEQQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x29)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPEQQ m512/m64bcst, zmm, k{k}
+ if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x29)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPCMPEQQ zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQQ m128/m64bcst, xmm, k{k}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x29)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPCMPEQQ xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQQ m256/m64bcst, ymm, k{k}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x29)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPCMPEQQ ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPEQQ")
+ }
+ return p
+}
+
+// VPCMPEQW performs "Compare Packed Word Data for Equality".
+//
+// Mnemonic : VPCMPEQW
+// Supported forms : (10 forms)
+//
+// * VPCMPEQW xmm, xmm, xmm [AVX]
+// * VPCMPEQW m128, xmm, xmm [AVX]
+// * VPCMPEQW ymm, ymm, ymm [AVX2]
+// * VPCMPEQW m256, ymm, ymm [AVX2]
+// * VPCMPEQW zmm, zmm, k{k} [AVX512BW]
+// * VPCMPEQW m512, zmm, k{k} [AVX512BW]
+// * VPCMPEQW xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPEQW m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPEQW ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPEQW m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPCMPEQW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPEQW", 3, Operands { v0, v1, v2 })
+ // VPCMPEQW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPEQW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPEQW zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQW m512, zmm, k{k}
+ if isM512(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPCMPEQW xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQW m128, xmm, k{k}
+ if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPCMPEQW ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPEQW m256, ymm, k{k}
+ if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPEQW")
+ }
+ return p
+}
+
+// VPCMPESTRI performs "Packed Compare Explicit Length Strings, Return Index".
+//
+// Mnemonic : VPCMPESTRI
+// Supported forms : (2 forms)
+//
+// * VPCMPESTRI imm8, xmm, xmm [AVX]
+// * VPCMPESTRI imm8, m128, xmm [AVX]
+//
+func (self *Program) VPCMPESTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPESTRI", 3, Operands { v0, v1, v2 })
+ // VPCMPESTRI imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79)
+ m.emit(0x61)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPESTRI imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x61)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPESTRI")
+ }
+ return p
+}
+
+// VPCMPESTRM performs "Packed Compare Explicit Length Strings, Return Mask".
+//
+// Mnemonic : VPCMPESTRM
+// Supported forms : (2 forms)
+//
+// * VPCMPESTRM imm8, xmm, xmm [AVX]
+// * VPCMPESTRM imm8, m128, xmm [AVX]
+//
+func (self *Program) VPCMPESTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPESTRM", 3, Operands { v0, v1, v2 })
+ // VPCMPESTRM imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79)
+ m.emit(0x60)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPESTRM imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x60)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPESTRM")
+ }
+ return p
+}
+
+// VPCMPGTB performs "Compare Packed Signed Byte Integers for Greater Than".
+//
+// Mnemonic : VPCMPGTB
+// Supported forms : (10 forms)
+//
+// * VPCMPGTB xmm, xmm, xmm [AVX]
+// * VPCMPGTB m128, xmm, xmm [AVX]
+// * VPCMPGTB ymm, ymm, ymm [AVX2]
+// * VPCMPGTB m256, ymm, ymm [AVX2]
+// * VPCMPGTB zmm, zmm, k{k} [AVX512BW]
+// * VPCMPGTB m512, zmm, k{k} [AVX512BW]
+// * VPCMPGTB xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPGTB m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPGTB ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPGTB m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPCMPGTB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPGTB", 3, Operands { v0, v1, v2 })
+ // VPCMPGTB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPGTB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPGTB zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTB m512, zmm, k{k}
+ if isM512(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPCMPGTB xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTB m128, xmm, k{k}
+ if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPCMPGTB ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x64)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTB m256, ymm, k{k}
+ if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x64)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPGTB")
+ }
+ return p
+}
+
+// VPCMPGTD performs "Compare Packed Signed Doubleword Integers for Greater Than".
+//
+// Mnemonic : VPCMPGTD
+// Supported forms : (10 forms)
+//
+// * VPCMPGTD xmm, xmm, xmm [AVX]
+// * VPCMPGTD m128, xmm, xmm [AVX]
+// * VPCMPGTD ymm, ymm, ymm [AVX2]
+// * VPCMPGTD m256, ymm, ymm [AVX2]
+// * VPCMPGTD m512/m32bcst, zmm, k{k} [AVX512F]
+// * VPCMPGTD zmm, zmm, k{k} [AVX512F]
+// * VPCMPGTD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPGTD xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPGTD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPCMPGTD ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCMPGTD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPGTD", 3, Operands { v0, v1, v2 })
+ // VPCMPGTD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPGTD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPGTD m512/m32bcst, zmm, k{k}
+ if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPCMPGTD zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTD m128/m32bcst, xmm, k{k}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPCMPGTD xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTD m256/m32bcst, ymm, k{k}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x66)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPCMPGTD ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x66)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPGTD")
+ }
+ return p
+}
+
+// VPCMPGTQ performs "Compare Packed Data for Greater Than".
+//
+// Mnemonic : VPCMPGTQ
+// Supported forms : (10 forms)
+//
+// * VPCMPGTQ xmm, xmm, xmm [AVX]
+// * VPCMPGTQ m128, xmm, xmm [AVX]
+// * VPCMPGTQ ymm, ymm, ymm [AVX2]
+// * VPCMPGTQ m256, ymm, ymm [AVX2]
+// * VPCMPGTQ m512/m64bcst, zmm, k{k} [AVX512F]
+// * VPCMPGTQ zmm, zmm, k{k} [AVX512F]
+// * VPCMPGTQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPGTQ xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPGTQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPCMPGTQ ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCMPGTQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPGTQ", 3, Operands { v0, v1, v2 })
+ // VPCMPGTQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x37)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x37)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPGTQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x37)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x37)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPGTQ m512/m64bcst, zmm, k{k}
+ if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x37)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPCMPGTQ zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x37)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTQ m128/m64bcst, xmm, k{k}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x37)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPCMPGTQ xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x37)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTQ m256/m64bcst, ymm, k{k}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x37)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPCMPGTQ ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x37)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPGTQ")
+ }
+ return p
+}
+
+// VPCMPGTW performs "Compare Packed Signed Word Integers for Greater Than".
+//
+// Mnemonic : VPCMPGTW
+// Supported forms : (10 forms)
+//
+// * VPCMPGTW xmm, xmm, xmm [AVX]
+// * VPCMPGTW m128, xmm, xmm [AVX]
+// * VPCMPGTW ymm, ymm, ymm [AVX2]
+// * VPCMPGTW m256, ymm, ymm [AVX2]
+// * VPCMPGTW zmm, zmm, k{k} [AVX512BW]
+// * VPCMPGTW m512, zmm, k{k} [AVX512BW]
+// * VPCMPGTW xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPGTW m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPGTW ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPGTW m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPCMPGTW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPGTW", 3, Operands { v0, v1, v2 })
+ // VPCMPGTW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPGTW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPCMPGTW zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTW m512, zmm, k{k}
+ if isM512(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPCMPGTW xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTW m128, xmm, k{k}
+ if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPCMPGTW ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x65)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCMPGTW m256, ymm, k{k}
+ if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x65)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPGTW")
+ }
+ return p
+}
+
+// VPCMPISTRI performs "Packed Compare Implicit Length Strings, Return Index".
+//
+// Mnemonic : VPCMPISTRI
+// Supported forms : (2 forms)
+//
+// * VPCMPISTRI imm8, xmm, xmm [AVX]
+// * VPCMPISTRI imm8, m128, xmm [AVX]
+//
+func (self *Program) VPCMPISTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPISTRI", 3, Operands { v0, v1, v2 })
+ // VPCMPISTRI imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79)
+ m.emit(0x63)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPISTRI imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x63)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPISTRI")
+ }
+ return p
+}
+
+// VPCMPISTRM performs "Packed Compare Implicit Length Strings, Return Mask".
+//
+// Mnemonic : VPCMPISTRM
+// Supported forms : (2 forms)
+//
+// * VPCMPISTRM imm8, xmm, xmm [AVX]
+// * VPCMPISTRM imm8, m128, xmm [AVX]
+//
+func (self *Program) VPCMPISTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPCMPISTRM", 3, Operands { v0, v1, v2 })
+ // VPCMPISTRM imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79)
+ m.emit(0x62)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPISTRM imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x62)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPISTRM")
+ }
+ return p
+}
+
+// VPCMPQ performs "Compare Packed Signed Quadword Values".
+//
+// Mnemonic : VPCMPQ
+// Supported forms : (6 forms)
+//
+// * VPCMPQ imm8, m512/m64bcst, zmm, k{k} [AVX512F]
+// * VPCMPQ imm8, zmm, zmm, k{k} [AVX512F]
+// * VPCMPQ imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPQ imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPQ imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPCMPQ imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCMPQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCMPQ", 4, Operands { v0, v1, v2, v3 })
+ // VPCMPQ imm8, m512/m64bcst, zmm, k{k}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPQ imm8, zmm, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x1f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPQ imm8, m128/m64bcst, xmm, k{k}
+ if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPQ imm8, xmm, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x1f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPQ imm8, m256/m64bcst, ymm, k{k}
+ if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPQ imm8, ymm, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x1f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPQ")
+ }
+ return p
+}
+
+// VPCMPUB performs "Compare Packed Unsigned Byte Values".
+//
+// Mnemonic : VPCMPUB
+// Supported forms : (6 forms)
+//
+// * VPCMPUB imm8, zmm, zmm, k{k} [AVX512BW]
+// * VPCMPUB imm8, m512, zmm, k{k} [AVX512BW]
+// * VPCMPUB imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPUB imm8, m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPUB imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPUB imm8, m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPCMPUB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCMPUB", 4, Operands { v0, v1, v2, v3 })
+ // VPCMPUB imm8, zmm, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUB imm8, m512, zmm, k{k}
+ if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUB imm8, xmm, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUB imm8, m128, xmm, k{k}
+ if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUB imm8, ymm, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUB imm8, m256, ymm, k{k}
+ if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPUB")
+ }
+ return p
+}
+
+// VPCMPUD performs "Compare Packed Unsigned Doubleword Values".
+//
+// Mnemonic : VPCMPUD
+// Supported forms : (6 forms)
+//
+// * VPCMPUD imm8, m512/m32bcst, zmm, k{k} [AVX512F]
+// * VPCMPUD imm8, zmm, zmm, k{k} [AVX512F]
+// * VPCMPUD imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPUD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPUD imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPCMPUD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCMPUD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCMPUD", 4, Operands { v0, v1, v2, v3 })
+ // VPCMPUD imm8, m512/m32bcst, zmm, k{k}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUD imm8, zmm, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUD imm8, m128/m32bcst, xmm, k{k}
+ if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUD imm8, xmm, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUD imm8, m256/m32bcst, ymm, k{k}
+ if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUD imm8, ymm, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPUD")
+ }
+ return p
+}
+
+// VPCMPUQ performs "Compare Packed Unsigned Quadword Values".
+//
+// Mnemonic : VPCMPUQ
+// Supported forms : (6 forms)
+//
+// * VPCMPUQ imm8, m512/m64bcst, zmm, k{k} [AVX512F]
+// * VPCMPUQ imm8, zmm, zmm, k{k} [AVX512F]
+// * VPCMPUQ imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPUQ imm8, xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPCMPUQ imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPCMPUQ imm8, ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCMPUQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCMPUQ", 4, Operands { v0, v1, v2, v3 })
+ // VPCMPUQ imm8, m512/m64bcst, zmm, k{k}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUQ imm8, zmm, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUQ imm8, m128/m64bcst, xmm, k{k}
+ if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUQ imm8, xmm, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUQ imm8, m256/m64bcst, ymm, k{k}
+ if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1]))
+ m.emit(0x1e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUQ imm8, ymm, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x1e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPUQ")
+ }
+ return p
+}
+
+// VPCMPUW performs "Compare Packed Unsigned Word Values".
+//
+// Mnemonic : VPCMPUW
+// Supported forms : (6 forms)
+//
+// * VPCMPUW imm8, zmm, zmm, k{k} [AVX512BW]
+// * VPCMPUW imm8, m512, zmm, k{k} [AVX512BW]
+// * VPCMPUW imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPUW imm8, m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPUW imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPUW imm8, m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPCMPUW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCMPUW", 4, Operands { v0, v1, v2, v3 })
+ // VPCMPUW imm8, zmm, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUW imm8, m512, zmm, k{k}
+ if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUW imm8, xmm, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUW imm8, m128, xmm, k{k}
+ if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUW imm8, ymm, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPUW imm8, m256, ymm, k{k}
+ if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPUW")
+ }
+ return p
+}
+
+// VPCMPW performs "Compare Packed Signed Word Values".
+//
+// Mnemonic : VPCMPW
+// Supported forms : (6 forms)
+//
+// * VPCMPW imm8, zmm, zmm, k{k} [AVX512BW]
+// * VPCMPW imm8, m512, zmm, k{k} [AVX512BW]
+// * VPCMPW imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPW imm8, m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPW imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPCMPW imm8, m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPCMPW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCMPW", 4, Operands { v0, v1, v2, v3 })
+ // VPCMPW imm8, zmm, zmm, k{k}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPW imm8, m512, zmm, k{k}
+ if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPW imm8, xmm, xmm, k{k}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPW imm8, m128, xmm, k{k}
+ if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPW imm8, ymm, ymm, k{k}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCMPW imm8, m256, ymm, k{k}
+ if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0)
+ m.emit(0x3f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCMPW")
+ }
+ return p
+}
+
+// VPCOMB performs "Compare Packed Signed Byte Integers".
+//
+// Mnemonic : VPCOMB
+// Supported forms : (2 forms)
+//
+// * VPCOMB imm8, xmm, xmm, xmm [XOP]
+// * VPCOMB imm8, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPCOMB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCOMB", 4, Operands { v0, v1, v2, v3 })
+ // VPCOMB imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xcc)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCOMB imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xcc)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMB")
+ }
+ return p
+}
+
+// VPCOMD performs "Compare Packed Signed Doubleword Integers".
+//
+// Mnemonic : VPCOMD
+// Supported forms : (2 forms)
+//
+// * VPCOMD imm8, xmm, xmm, xmm [XOP]
+// * VPCOMD imm8, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPCOMD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCOMD", 4, Operands { v0, v1, v2, v3 })
+ // VPCOMD imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xce)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCOMD imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xce)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMD")
+ }
+ return p
+}
+
+// VPCOMPRESSD performs "Store Sparse Packed Doubleword Integer Values into Dense Memory/Register".
+//
+// Mnemonic : VPCOMPRESSD
+// Supported forms : (6 forms)
+//
+// * VPCOMPRESSD zmm, zmm{k}{z} [AVX512F]
+// * VPCOMPRESSD zmm, m512{k}{z} [AVX512F]
+// * VPCOMPRESSD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPCOMPRESSD xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VPCOMPRESSD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPCOMPRESSD ymm, m256{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCOMPRESSD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPCOMPRESSD", 2, Operands { v0, v1 })
+ // VPCOMPRESSD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x8b)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPCOMPRESSD zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPCOMPRESSD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x8b)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPCOMPRESSD xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPCOMPRESSD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x8b)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPCOMPRESSD ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMPRESSD")
+ }
+ return p
+}
+
+// VPCOMPRESSQ performs "Store Sparse Packed Quadword Integer Values into Dense Memory/Register".
+//
+// Mnemonic : VPCOMPRESSQ
+// Supported forms : (6 forms)
+//
+// * VPCOMPRESSQ zmm, zmm{k}{z} [AVX512F]
+// * VPCOMPRESSQ zmm, m512{k}{z} [AVX512F]
+// * VPCOMPRESSQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPCOMPRESSQ xmm, m128{k}{z} [AVX512F,AVX512VL]
+// * VPCOMPRESSQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPCOMPRESSQ ymm, m256{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPCOMPRESSQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPCOMPRESSQ", 2, Operands { v0, v1 })
+ // VPCOMPRESSQ zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x8b)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPCOMPRESSQ zmm, m512{k}{z}
+ if isZMM(v0) && isM512kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPCOMPRESSQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x8b)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPCOMPRESSQ xmm, m128{k}{z}
+ if isEVEXXMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPCOMPRESSQ ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x8b)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPCOMPRESSQ ymm, m256{k}{z}
+ if isEVEXYMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x8b)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMPRESSQ")
+ }
+ return p
+}
+
+// VPCOMQ performs "Compare Packed Signed Quadword Integers".
+//
+// Mnemonic : VPCOMQ
+// Supported forms : (2 forms)
+//
+// * VPCOMQ imm8, xmm, xmm, xmm [XOP]
+// * VPCOMQ imm8, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPCOMQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCOMQ", 4, Operands { v0, v1, v2, v3 })
+ // VPCOMQ imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xcf)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCOMQ imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xcf)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMQ")
+ }
+ return p
+}
+
+// VPCOMUB performs "Compare Packed Unsigned Byte Integers".
+//
+// Mnemonic : VPCOMUB
+// Supported forms : (2 forms)
+//
+// * VPCOMUB imm8, xmm, xmm, xmm [XOP]
+// * VPCOMUB imm8, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPCOMUB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCOMUB", 4, Operands { v0, v1, v2, v3 })
+ // VPCOMUB imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xec)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCOMUB imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xec)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMUB")
+ }
+ return p
+}
+
+// VPCOMUD performs "Compare Packed Unsigned Doubleword Integers".
+//
+// Mnemonic : VPCOMUD
+// Supported forms : (2 forms)
+//
+// * VPCOMUD imm8, xmm, xmm, xmm [XOP]
+// * VPCOMUD imm8, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPCOMUD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCOMUD", 4, Operands { v0, v1, v2, v3 })
+ // VPCOMUD imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xee)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCOMUD imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xee)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMUD")
+ }
+ return p
+}
+
+// VPCOMUQ performs "Compare Packed Unsigned Quadword Integers".
+//
+// Mnemonic : VPCOMUQ
+// Supported forms : (2 forms)
+//
+// * VPCOMUQ imm8, xmm, xmm, xmm [XOP]
+// * VPCOMUQ imm8, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPCOMUQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCOMUQ", 4, Operands { v0, v1, v2, v3 })
+ // VPCOMUQ imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCOMUQ imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xef)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMUQ")
+ }
+ return p
+}
+
+// VPCOMUW performs "Compare Packed Unsigned Word Integers".
+//
+// Mnemonic : VPCOMUW
+// Supported forms : (2 forms)
+//
+// * VPCOMUW imm8, xmm, xmm, xmm [XOP]
+// * VPCOMUW imm8, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPCOMUW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCOMUW", 4, Operands { v0, v1, v2, v3 })
+ // VPCOMUW imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xed)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCOMUW imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xed)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMUW")
+ }
+ return p
+}
+
+// VPCOMW performs "Compare Packed Signed Word Integers".
+//
+// Mnemonic : VPCOMW
+// Supported forms : (2 forms)
+//
+// * VPCOMW imm8, xmm, xmm, xmm [XOP]
+// * VPCOMW imm8, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPCOMW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPCOMW", 4, Operands { v0, v1, v2, v3 })
+ // VPCOMW imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xcd)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPCOMW imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xcd)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCOMW")
+ }
+ return p
+}
+
+// VPCONFLICTD performs "Detect Conflicts Within a Vector of Packed Doubleword Values into Dense Memory/Register".
+//
+// Mnemonic : VPCONFLICTD
+// Supported forms : (6 forms)
+//
+// * VPCONFLICTD m128/m32bcst, xmm{k}{z} [AVX512CD,AVX512VL]
+// * VPCONFLICTD m256/m32bcst, ymm{k}{z} [AVX512CD,AVX512VL]
+// * VPCONFLICTD m512/m32bcst, zmm{k}{z} [AVX512CD]
+// * VPCONFLICTD xmm, xmm{k}{z} [AVX512CD,AVX512VL]
+// * VPCONFLICTD ymm, ymm{k}{z} [AVX512CD,AVX512VL]
+// * VPCONFLICTD zmm, zmm{k}{z} [AVX512CD]
+//
+func (self *Program) VPCONFLICTD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPCONFLICTD", 2, Operands { v0, v1 })
+ // VPCONFLICTD m128/m32bcst, xmm{k}{z}
+ if isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xc4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPCONFLICTD m256/m32bcst, ymm{k}{z}
+ if isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xc4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPCONFLICTD m512/m32bcst, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xc4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPCONFLICTD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCONFLICTD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCONFLICTD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCONFLICTD")
+ }
+ return p
+}
+
+// VPCONFLICTQ performs "Detect Conflicts Within a Vector of Packed Quadword Values into Dense Memory/Register".
+//
+// Mnemonic : VPCONFLICTQ
+// Supported forms : (6 forms)
+//
+// * VPCONFLICTQ m128/m64bcst, xmm{k}{z} [AVX512CD,AVX512VL]
+// * VPCONFLICTQ m256/m64bcst, ymm{k}{z} [AVX512CD,AVX512VL]
+// * VPCONFLICTQ m512/m64bcst, zmm{k}{z} [AVX512CD]
+// * VPCONFLICTQ xmm, xmm{k}{z} [AVX512CD,AVX512VL]
+// * VPCONFLICTQ ymm, ymm{k}{z} [AVX512CD,AVX512VL]
+// * VPCONFLICTQ zmm, zmm{k}{z} [AVX512CD]
+//
+func (self *Program) VPCONFLICTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPCONFLICTQ", 2, Operands { v0, v1 })
+ // VPCONFLICTQ m128/m64bcst, xmm{k}{z}
+ if isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xc4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPCONFLICTQ m256/m64bcst, ymm{k}{z}
+ if isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xc4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPCONFLICTQ m512/m64bcst, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xc4)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPCONFLICTQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCONFLICTQ ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPCONFLICTQ zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPCONFLICTQ")
+ }
+ return p
+}
+
+// VPERM2F128 performs "Permute Floating-Point Values".
+//
+// Mnemonic : VPERM2F128
+// Supported forms : (2 forms)
+//
+// * VPERM2F128 imm8, ymm, ymm, ymm [AVX]
+// * VPERM2F128 imm8, m256, ymm, ymm [AVX]
+//
+func (self *Program) VPERM2F128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPERM2F128", 4, Operands { v0, v1, v2, v3 })
+ // VPERM2F128 imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x06)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERM2F128 imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x06)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERM2F128")
+ }
+ return p
+}
+
+// VPERM2I128 performs "Permute 128-Bit Integer Values".
+//
+// Mnemonic : VPERM2I128
+// Supported forms : (2 forms)
+//
+// * VPERM2I128 imm8, ymm, ymm, ymm [AVX2]
+// * VPERM2I128 imm8, m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPERM2I128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPERM2I128", 4, Operands { v0, v1, v2, v3 })
+ // VPERM2I128 imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERM2I128 imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x46)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERM2I128")
+ }
+ return p
+}
+
+// VPERMB performs "Permute Byte Integers".
+//
+// Mnemonic : VPERMB
+// Supported forms : (6 forms)
+//
+// * VPERMB xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMB m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMB ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMB m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMB zmm, zmm, zmm{k}{z} [AVX512VBMI]
+// * VPERMB m512, zmm, zmm{k}{z} [AVX512VBMI]
+//
+func (self *Program) VPERMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMB", 3, Operands { v0, v1, v2 })
+ // VPERMB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x8d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x8d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x8d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x8d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x8d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x8d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMB")
+ }
+ return p
+}
+
+// VPERMD performs "Permute Doubleword Integers".
+//
+// Mnemonic : VPERMD
+// Supported forms : (6 forms)
+//
+// * VPERMD ymm, ymm, ymm [AVX2]
+// * VPERMD m256, ymm, ymm [AVX2]
+// * VPERMD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMD", 3, Operands { v0, v1, v2 })
+ // VPERMD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x36)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x36)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPERMD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x36)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x36)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x36)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x36)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMD")
+ }
+ return p
+}
+
+// VPERMI2B performs "Full Permute of Bytes From Two Tables Overwriting the Index".
+//
+// Mnemonic : VPERMI2B
+// Supported forms : (6 forms)
+//
+// * VPERMI2B xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMI2B m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMI2B ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMI2B m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMI2B zmm, zmm, zmm{k}{z} [AVX512VBMI]
+// * VPERMI2B m512, zmm, zmm{k}{z} [AVX512VBMI]
+//
+func (self *Program) VPERMI2B(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMI2B", 3, Operands { v0, v1, v2 })
+ // VPERMI2B xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2B m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMI2B ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2B m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMI2B zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2B m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMI2B")
+ }
+ return p
+}
+
+// VPERMI2D performs "Full Permute of Doublewords From Two Tables Overwriting the Index".
+//
+// Mnemonic : VPERMI2D
+// Supported forms : (6 forms)
+//
+// * VPERMI2D m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMI2D zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMI2D m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2D xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2D m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2D ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMI2D(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMI2D", 3, Operands { v0, v1, v2 })
+ // VPERMI2D m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMI2D zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2D m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMI2D xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2D m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMI2D ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMI2D")
+ }
+ return p
+}
+
+// VPERMI2PD performs "Full Permute of Double-Precision Floating-Point Values From Two Tables Overwriting the Index".
+//
+// Mnemonic : VPERMI2PD
+// Supported forms : (6 forms)
+//
+// * VPERMI2PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMI2PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMI2PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMI2PD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMI2PD", 3, Operands { v0, v1, v2 })
+ // VPERMI2PD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x77)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMI2PD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x77)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2PD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x77)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMI2PD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x77)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2PD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x77)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMI2PD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x77)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMI2PD")
+ }
+ return p
+}
+
+// VPERMI2PS performs "Full Permute of Single-Precision Floating-Point Values From Two Tables Overwriting the Index".
+//
+// Mnemonic : VPERMI2PS
+// Supported forms : (6 forms)
+//
+// * VPERMI2PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMI2PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMI2PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMI2PS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMI2PS", 3, Operands { v0, v1, v2 })
+ // VPERMI2PS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x77)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMI2PS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x77)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2PS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x77)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMI2PS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x77)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2PS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x77)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMI2PS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x77)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMI2PS")
+ }
+ return p
+}
+
+// VPERMI2Q performs "Full Permute of Quadwords From Two Tables Overwriting the Index".
+//
+// Mnemonic : VPERMI2Q
+// Supported forms : (6 forms)
+//
+// * VPERMI2Q m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMI2Q zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMI2Q m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2Q xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2Q m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMI2Q ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMI2Q(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMI2Q", 3, Operands { v0, v1, v2 })
+ // VPERMI2Q m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMI2Q zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2Q m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMI2Q xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2Q m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x76)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMI2Q ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x76)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMI2Q")
+ }
+ return p
+}
+
+// VPERMI2W performs "Full Permute of Words From Two Tables Overwriting the Index".
+//
+// Mnemonic : VPERMI2W
+// Supported forms : (6 forms)
+//
+// * VPERMI2W zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPERMI2W m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPERMI2W xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPERMI2W m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPERMI2W ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPERMI2W m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPERMI2W(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMI2W", 3, Operands { v0, v1, v2 })
+ // VPERMI2W zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2W m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMI2W xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2W m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMI2W ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x75)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMI2W m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x75)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMI2W")
+ }
+ return p
+}
+
+// VPERMIL2PD performs "Permute Two-Source Double-Precision Floating-Point Vectors".
+//
+// Mnemonic : VPERMIL2PD
+// Supported forms : (6 forms)
+//
+// * VPERMIL2PD imm4, xmm, xmm, xmm, xmm [XOP]
+// * VPERMIL2PD imm4, m128, xmm, xmm, xmm [XOP]
+// * VPERMIL2PD imm4, xmm, m128, xmm, xmm [XOP]
+// * VPERMIL2PD imm4, ymm, ymm, ymm, ymm [XOP]
+// * VPERMIL2PD imm4, m256, ymm, ymm, ymm [XOP]
+// * VPERMIL2PD imm4, ymm, m256, ymm, ymm [XOP]
+//
+func (self *Program) VPERMIL2PD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, v4 interface{}) *Instruction {
+ p := self.alloc("VPERMIL2PD", 5, Operands { v0, v1, v2, v3, v4 })
+ // VPERMIL2PD imm4, xmm, xmm, xmm, xmm
+ if isImm4(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x79 ^ (hlcode(v[3]) << 3))
+ m.emit(0x49)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.emit((hlcode(v[1]) << 4) | imml(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[3]) << 3))
+ m.emit(0x49)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1]))
+ m.emit((hlcode(v[2]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PD imm4, m128, xmm, xmm, xmm
+ if isImm4(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[4]), addr(v[1]), hlcode(v[3]))
+ m.emit(0x49)
+ m.mrsd(lcode(v[4]), addr(v[1]), 1)
+ m.emit((hlcode(v[2]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PD imm4, xmm, m128, xmm, xmm
+ if isImm4(v0) && isXMM(v1) && isM128(v2) && isXMM(v3) && isXMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[4]), addr(v[2]), hlcode(v[3]))
+ m.emit(0x49)
+ m.mrsd(lcode(v[4]), addr(v[2]), 1)
+ m.emit((hlcode(v[1]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PD imm4, ymm, ymm, ymm, ymm
+ if isImm4(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x7d ^ (hlcode(v[3]) << 3))
+ m.emit(0x49)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.emit((hlcode(v[1]) << 4) | imml(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[3]) << 3))
+ m.emit(0x49)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1]))
+ m.emit((hlcode(v[2]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PD imm4, m256, ymm, ymm, ymm
+ if isImm4(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[4]), addr(v[1]), hlcode(v[3]))
+ m.emit(0x49)
+ m.mrsd(lcode(v[4]), addr(v[1]), 1)
+ m.emit((hlcode(v[2]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PD imm4, ymm, m256, ymm, ymm
+ if isImm4(v0) && isYMM(v1) && isM256(v2) && isYMM(v3) && isYMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[4]), addr(v[2]), hlcode(v[3]))
+ m.emit(0x49)
+ m.mrsd(lcode(v[4]), addr(v[2]), 1)
+ m.emit((hlcode(v[1]) << 4) | imml(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMIL2PD")
+ }
+ return p
+}
+
+// VPERMIL2PS performs "Permute Two-Source Single-Precision Floating-Point Vectors".
+//
+// Mnemonic : VPERMIL2PS
+// Supported forms : (6 forms)
+//
+// * VPERMIL2PS imm4, xmm, xmm, xmm, xmm [XOP]
+// * VPERMIL2PS imm4, m128, xmm, xmm, xmm [XOP]
+// * VPERMIL2PS imm4, xmm, m128, xmm, xmm [XOP]
+// * VPERMIL2PS imm4, ymm, ymm, ymm, ymm [XOP]
+// * VPERMIL2PS imm4, m256, ymm, ymm, ymm [XOP]
+// * VPERMIL2PS imm4, ymm, m256, ymm, ymm [XOP]
+//
+func (self *Program) VPERMIL2PS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, v4 interface{}) *Instruction {
+ p := self.alloc("VPERMIL2PS", 5, Operands { v0, v1, v2, v3, v4 })
+ // VPERMIL2PS imm4, xmm, xmm, xmm, xmm
+ if isImm4(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x79 ^ (hlcode(v[3]) << 3))
+ m.emit(0x48)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.emit((hlcode(v[1]) << 4) | imml(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[3]) << 3))
+ m.emit(0x48)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1]))
+ m.emit((hlcode(v[2]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PS imm4, m128, xmm, xmm, xmm
+ if isImm4(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[4]), addr(v[1]), hlcode(v[3]))
+ m.emit(0x48)
+ m.mrsd(lcode(v[4]), addr(v[1]), 1)
+ m.emit((hlcode(v[2]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PS imm4, xmm, m128, xmm, xmm
+ if isImm4(v0) && isXMM(v1) && isM128(v2) && isXMM(v3) && isXMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[4]), addr(v[2]), hlcode(v[3]))
+ m.emit(0x48)
+ m.mrsd(lcode(v[4]), addr(v[2]), 1)
+ m.emit((hlcode(v[1]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PS imm4, ymm, ymm, ymm, ymm
+ if isImm4(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x7d ^ (hlcode(v[3]) << 3))
+ m.emit(0x48)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.emit((hlcode(v[1]) << 4) | imml(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[3]) << 3))
+ m.emit(0x48)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1]))
+ m.emit((hlcode(v[2]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PS imm4, m256, ymm, ymm, ymm
+ if isImm4(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[4]), addr(v[1]), hlcode(v[3]))
+ m.emit(0x48)
+ m.mrsd(lcode(v[4]), addr(v[1]), 1)
+ m.emit((hlcode(v[2]) << 4) | imml(v[0]))
+ })
+ }
+ // VPERMIL2PS imm4, ymm, m256, ymm, ymm
+ if isImm4(v0) && isYMM(v1) && isM256(v2) && isYMM(v3) && isYMM(v4) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[4]), addr(v[2]), hlcode(v[3]))
+ m.emit(0x48)
+ m.mrsd(lcode(v[4]), addr(v[2]), 1)
+ m.emit((hlcode(v[1]) << 4) | imml(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMIL2PS")
+ }
+ return p
+}
+
+// VPERMILPD performs "Permute Double-Precision Floating-Point Values".
+//
+// Mnemonic : VPERMILPD
+// Supported forms : (20 forms)
+//
+// * VPERMILPD imm8, xmm, xmm [AVX]
+// * VPERMILPD xmm, xmm, xmm [AVX]
+// * VPERMILPD m128, xmm, xmm [AVX]
+// * VPERMILPD imm8, m128, xmm [AVX]
+// * VPERMILPD imm8, ymm, ymm [AVX]
+// * VPERMILPD ymm, ymm, ymm [AVX]
+// * VPERMILPD m256, ymm, ymm [AVX]
+// * VPERMILPD imm8, m256, ymm [AVX]
+// * VPERMILPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VPERMILPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMILPD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPERMILPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMILPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMILPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMILPD", 3, Operands { v0, v1, v2 })
+ // VPERMILPD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79)
+ m.emit(0x05)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x0d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMILPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x0d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPERMILPD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x05)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d)
+ m.emit(0x05)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x0d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMILPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x0d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPERMILPD imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x05)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD imm8, m512/m64bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x05)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x0d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMILPD imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x05)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x0d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMILPD imm8, m128/m64bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x05)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD imm8, m256/m64bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x05)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x0d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMILPD imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x05)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x0d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMILPD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x0d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMILPD imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x05)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x0d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMILPD")
+ }
+ return p
+}
+
+// VPERMILPS performs "Permute Single-Precision Floating-Point Values".
+//
+// Mnemonic : VPERMILPS
+// Supported forms : (20 forms)
+//
+// * VPERMILPS imm8, xmm, xmm [AVX]
+// * VPERMILPS xmm, xmm, xmm [AVX]
+// * VPERMILPS m128, xmm, xmm [AVX]
+// * VPERMILPS imm8, m128, xmm [AVX]
+// * VPERMILPS imm8, ymm, ymm [AVX]
+// * VPERMILPS ymm, ymm, ymm [AVX]
+// * VPERMILPS m256, ymm, ymm [AVX]
+// * VPERMILPS imm8, m256, ymm [AVX]
+// * VPERMILPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VPERMILPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMILPS imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPERMILPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMILPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMILPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMILPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMILPS", 3, Operands { v0, v1, v2 })
+ // VPERMILPS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x0c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMILPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x0c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPERMILPS imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x0c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMILPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x0c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPERMILPS imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS imm8, m512/m32bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x0c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMILPS imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x0c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMILPS imm8, m128/m32bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS imm8, m256/m32bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x0c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMILPS imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x0c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMILPS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x0c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMILPS imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMILPS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x0c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMILPS")
+ }
+ return p
+}
+
+// VPERMPD performs "Permute Double-Precision Floating-Point Elements".
+//
+// Mnemonic : VPERMPD
+// Supported forms : (10 forms)
+//
+// * VPERMPD imm8, ymm, ymm [AVX2]
+// * VPERMPD imm8, m256, ymm [AVX2]
+// * VPERMPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VPERMPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMPD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPERMPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMPD", 3, Operands { v0, v1, v2 })
+ // VPERMPD imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xfd)
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMPD imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x01)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMPD imm8, m512/m64bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMPD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x16)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMPD imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMPD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMPD imm8, m256/m64bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMPD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x16)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMPD imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMPD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMPD")
+ }
+ return p
+}
+
+// VPERMPS performs "Permute Single-Precision Floating-Point Elements".
+//
+// Mnemonic : VPERMPS
+// Supported forms : (6 forms)
+//
+// * VPERMPS ymm, ymm, ymm [AVX2]
+// * VPERMPS m256, ymm, ymm [AVX2]
+// * VPERMPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMPS", 3, Operands { v0, v1, v2 })
+ // VPERMPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x16)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPERMPS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x16)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMPS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMPS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x16)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMPS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMPS")
+ }
+ return p
+}
+
+// VPERMQ performs "Permute Quadword Integers".
+//
+// Mnemonic : VPERMQ
+// Supported forms : (10 forms)
+//
+// * VPERMQ imm8, ymm, ymm [AVX2]
+// * VPERMQ imm8, m256, ymm [AVX2]
+// * VPERMQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VPERMQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMQ imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPERMQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMQ", 3, Operands { v0, v1, v2 })
+ // VPERMQ imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xfd)
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMQ imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x85, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x00)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMQ imm8, m512/m64bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x00)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x36)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMQ imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x36)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMQ imm8, m256/m64bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x00)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x36)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMQ imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPERMQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x36)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMQ")
+ }
+ return p
+}
+
+// VPERMT2B performs "Full Permute of Bytes From Two Tables Overwriting a Table".
+//
+// Mnemonic : VPERMT2B
+// Supported forms : (6 forms)
+//
+// * VPERMT2B xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMT2B m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMT2B ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMT2B m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPERMT2B zmm, zmm, zmm{k}{z} [AVX512VBMI]
+// * VPERMT2B m512, zmm, zmm{k}{z} [AVX512VBMI]
+//
+func (self *Program) VPERMT2B(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMT2B", 3, Operands { v0, v1, v2 })
+ // VPERMT2B xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2B m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMT2B ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2B m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMT2B zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2B m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMT2B")
+ }
+ return p
+}
+
+// VPERMT2D performs "Full Permute of Doublewords From Two Tables Overwriting a Table".
+//
+// Mnemonic : VPERMT2D
+// Supported forms : (6 forms)
+//
+// * VPERMT2D m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMT2D zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMT2D m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2D xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2D m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2D ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMT2D(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMT2D", 3, Operands { v0, v1, v2 })
+ // VPERMT2D m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMT2D zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2D m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMT2D xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2D m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMT2D ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMT2D")
+ }
+ return p
+}
+
+// VPERMT2PD performs "Full Permute of Double-Precision Floating-Point Values From Two Tables Overwriting a Table".
+//
+// Mnemonic : VPERMT2PD
+// Supported forms : (6 forms)
+//
+// * VPERMT2PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMT2PD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMT2PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMT2PD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMT2PD", 3, Operands { v0, v1, v2 })
+ // VPERMT2PD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMT2PD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2PD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMT2PD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2PD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMT2PD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMT2PD")
+ }
+ return p
+}
+
+// VPERMT2PS performs "Full Permute of Single-Precision Floating-Point Values From Two Tables Overwriting a Table".
+//
+// Mnemonic : VPERMT2PS
+// Supported forms : (6 forms)
+//
+// * VPERMT2PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMT2PS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMT2PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMT2PS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMT2PS", 3, Operands { v0, v1, v2 })
+ // VPERMT2PS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMT2PS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2PS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMT2PS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2PS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMT2PS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x7f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMT2PS")
+ }
+ return p
+}
+
+// VPERMT2Q performs "Full Permute of Quadwords From Two Tables Overwriting a Table".
+//
+// Mnemonic : VPERMT2Q
+// Supported forms : (6 forms)
+//
+// * VPERMT2Q m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPERMT2Q zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPERMT2Q m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2Q xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2Q m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPERMT2Q ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPERMT2Q(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMT2Q", 3, Operands { v0, v1, v2 })
+ // VPERMT2Q m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMT2Q zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2Q m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMT2Q xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2Q m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x7e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPERMT2Q ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x7e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMT2Q")
+ }
+ return p
+}
+
+// VPERMT2W performs "Full Permute of Words From Two Tables Overwriting a Table".
+//
+// Mnemonic : VPERMT2W
+// Supported forms : (6 forms)
+//
+// * VPERMT2W zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPERMT2W m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPERMT2W xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPERMT2W m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPERMT2W ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPERMT2W m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPERMT2W(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMT2W", 3, Operands { v0, v1, v2 })
+ // VPERMT2W zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2W m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMT2W xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2W m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMT2W ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x7d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMT2W m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x7d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMT2W")
+ }
+ return p
+}
+
+// VPERMW performs "Permute Word Integers".
+//
+// Mnemonic : VPERMW
+// Supported forms : (6 forms)
+//
+// * VPERMW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPERMW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPERMW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPERMW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPERMW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPERMW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPERMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPERMW", 3, Operands { v0, v1, v2 })
+ // VPERMW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x8d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x8d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPERMW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x8d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x8d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPERMW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x8d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPERMW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x8d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPERMW")
+ }
+ return p
+}
+
+// VPEXPANDD performs "Load Sparse Packed Doubleword Integer Values from Dense Memory/Register".
+//
+// Mnemonic : VPEXPANDD
+// Supported forms : (6 forms)
+//
+// * VPEXPANDD zmm, zmm{k}{z} [AVX512F]
+// * VPEXPANDD m512, zmm{k}{z} [AVX512F]
+// * VPEXPANDD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPEXPANDD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPEXPANDD m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPEXPANDD m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPEXPANDD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPEXPANDD", 2, Operands { v0, v1 })
+ // VPEXPANDD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x89)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPEXPANDD m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x89)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPEXPANDD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x89)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPEXPANDD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x89)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPEXPANDD m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x89)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPEXPANDD m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x89)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPEXPANDD")
+ }
+ return p
+}
+
+// VPEXPANDQ performs "Load Sparse Packed Quadword Integer Values from Dense Memory/Register".
+//
+// Mnemonic : VPEXPANDQ
+// Supported forms : (6 forms)
+//
+// * VPEXPANDQ zmm, zmm{k}{z} [AVX512F]
+// * VPEXPANDQ m512, zmm{k}{z} [AVX512F]
+// * VPEXPANDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPEXPANDQ ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPEXPANDQ m128, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPEXPANDQ m256, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPEXPANDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPEXPANDQ", 2, Operands { v0, v1 })
+ // VPEXPANDQ zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x89)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPEXPANDQ m512, zmm{k}{z}
+ if isM512(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x89)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPEXPANDQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x89)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPEXPANDQ ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x89)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPEXPANDQ m128, xmm{k}{z}
+ if isM128(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x89)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPEXPANDQ m256, ymm{k}{z}
+ if isM256(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x89)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPEXPANDQ")
+ }
+ return p
+}
+
+// VPEXTRB performs "Extract Byte".
+//
+// Mnemonic : VPEXTRB
+// Supported forms : (4 forms)
+//
+// * VPEXTRB imm8, xmm, r32 [AVX]
+// * VPEXTRB imm8, xmm, m8 [AVX]
+// * VPEXTRB imm8, xmm, r32 [AVX512BW]
+// * VPEXTRB imm8, xmm, m8 [AVX512BW]
+//
+func (self *Program) VPEXTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPEXTRB", 3, Operands { v0, v1, v2 })
+ // VPEXTRB imm8, xmm, r32
+ if isImm8(v0) && isXMM(v1) && isReg32(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x79)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRB imm8, xmm, m8
+ if isImm8(v0) && isXMM(v1) && isM8(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
+ m.emit(0x14)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRB imm8, xmm, r32
+ if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit(0x08)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRB imm8, xmm, m8
+ if isImm8(v0) && isEVEXXMM(v1) && isM8(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
+ m.emit(0x14)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPEXTRB")
+ }
+ return p
+}
+
+// VPEXTRD performs "Extract Doubleword".
+//
+// Mnemonic : VPEXTRD
+// Supported forms : (4 forms)
+//
+// * VPEXTRD imm8, xmm, r32 [AVX]
+// * VPEXTRD imm8, xmm, m32 [AVX]
+// * VPEXTRD imm8, xmm, r32 [AVX512DQ]
+// * VPEXTRD imm8, xmm, m32 [AVX512DQ]
+//
+func (self *Program) VPEXTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPEXTRD", 3, Operands { v0, v1, v2 })
+ // VPEXTRD imm8, xmm, r32
+ if isImm8(v0) && isXMM(v1) && isReg32(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x79)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRD imm8, xmm, m32
+ if isImm8(v0) && isXMM(v1) && isM32(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRD imm8, xmm, r32
+ if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit(0x08)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRD imm8, xmm, m32
+ if isImm8(v0) && isEVEXXMM(v1) && isM32(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[2]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPEXTRD")
+ }
+ return p
+}
+
+// VPEXTRQ performs "Extract Quadword".
+//
+// Mnemonic : VPEXTRQ
+// Supported forms : (4 forms)
+//
+// * VPEXTRQ imm8, xmm, r64 [AVX]
+// * VPEXTRQ imm8, xmm, m64 [AVX]
+// * VPEXTRQ imm8, xmm, r64 [AVX512DQ]
+// * VPEXTRQ imm8, xmm, m64 [AVX512DQ]
+//
+func (self *Program) VPEXTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPEXTRQ", 3, Operands { v0, v1, v2 })
+ // VPEXTRQ imm8, xmm, r64
+ if isImm8(v0) && isXMM(v1) && isReg64(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0xf9)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRQ imm8, xmm, m64
+ if isImm8(v0) && isXMM(v1) && isM64(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[1]), addr(v[2]), 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRQ imm8, xmm, r64
+ if isImm8(v0) && isEVEXXMM(v1) && isReg64(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit(0x08)
+ m.emit(0x16)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRQ imm8, xmm, m64
+ if isImm8(v0) && isEVEXXMM(v1) && isM64(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
+ m.emit(0x16)
+ m.mrsd(lcode(v[1]), addr(v[2]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPEXTRQ")
+ }
+ return p
+}
+
+// VPEXTRW performs "Extract Word".
+//
+// Mnemonic : VPEXTRW
+// Supported forms : (4 forms)
+//
+// * VPEXTRW imm8, xmm, r32 [AVX]
+// * VPEXTRW imm8, xmm, m16 [AVX]
+// * VPEXTRW imm8, xmm, r32 [AVX512BW]
+// * VPEXTRW imm8, xmm, m16 [AVX512BW]
+//
+func (self *Program) VPEXTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPEXTRW", 3, Operands { v0, v1, v2 })
+ // VPEXTRW imm8, xmm, r32
+ if isImm8(v0) && isXMM(v1) && isReg32(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[1], 0)
+ m.emit(0xc5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5))
+ m.emit(0x79)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRW imm8, xmm, m16
+ if isImm8(v0) && isXMM(v1) && isM16(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0)
+ m.emit(0x15)
+ m.mrsd(lcode(v[1]), addr(v[2]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRW imm8, xmm, r32
+ if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit(0x08)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit(0x08)
+ m.emit(0xc5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPEXTRW imm8, xmm, m16
+ if isImm8(v0) && isEVEXXMM(v1) && isM16(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0)
+ m.emit(0x15)
+ m.mrsd(lcode(v[1]), addr(v[2]), 2)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPEXTRW")
+ }
+ return p
+}
+
+// VPGATHERDD performs "Gather Packed Doubleword Values Using Signed Doubleword Indices".
+//
+// Mnemonic : VPGATHERDD
+// Supported forms : (5 forms)
+//
+// * VPGATHERDD xmm, vm32x, xmm [AVX2]
+// * VPGATHERDD ymm, vm32y, ymm [AVX2]
+// * VPGATHERDD vm32z, zmm{k} [AVX512F]
+// * VPGATHERDD vm32x, xmm{k} [AVX512F,AVX512VL]
+// * VPGATHERDD vm32y, ymm{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPGATHERDD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VPGATHERDD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VPGATHERDD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VPGATHERDD takes 2 or 3 operands")
+ }
+ // VPGATHERDD xmm, vm32x, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x90)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VPGATHERDD ymm, vm32y, ymm
+ if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x90)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VPGATHERDD vm32z, zmm{k}
+ if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPGATHERDD vm32x, xmm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPGATHERDD vm32y, ymm{k}
+ if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPGATHERDD")
+ }
+ return p
+}
+
+// VPGATHERDQ performs "Gather Packed Quadword Values Using Signed Doubleword Indices".
+//
+// Mnemonic : VPGATHERDQ
+// Supported forms : (5 forms)
+//
+// * VPGATHERDQ xmm, vm32x, xmm [AVX2]
+// * VPGATHERDQ ymm, vm32x, ymm [AVX2]
+// * VPGATHERDQ vm32y, zmm{k} [AVX512F]
+// * VPGATHERDQ vm32x, xmm{k} [AVX512F,AVX512VL]
+// * VPGATHERDQ vm32x, ymm{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPGATHERDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VPGATHERDQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VPGATHERDQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VPGATHERDQ takes 2 or 3 operands")
+ }
+ // VPGATHERDQ xmm, vm32x, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x90)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VPGATHERDQ ymm, vm32x, ymm
+ if len(vv) == 1 && isYMM(v0) && isVMX(v1) && isYMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x90)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VPGATHERDQ vm32y, zmm{k}
+ if len(vv) == 0 && isEVEXVMY(v0) && isZMMk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPGATHERDQ vm32x, xmm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPGATHERDQ vm32x, ymm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isYMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x90)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPGATHERDQ")
+ }
+ return p
+}
+
+// VPGATHERQD performs "Gather Packed Doubleword Values Using Signed Quadword Indices".
+//
+// Mnemonic : VPGATHERQD
+// Supported forms : (5 forms)
+//
+// * VPGATHERQD xmm, vm64x, xmm [AVX2]
+// * VPGATHERQD xmm, vm64y, xmm [AVX2]
+// * VPGATHERQD vm64z, ymm{k} [AVX512F]
+// * VPGATHERQD vm64x, xmm{k} [AVX512F,AVX512VL]
+// * VPGATHERQD vm64y, xmm{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPGATHERQD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VPGATHERQD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VPGATHERQD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VPGATHERQD takes 2 or 3 operands")
+ }
+ // VPGATHERQD xmm, vm64x, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x91)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VPGATHERQD xmm, vm64y, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMY(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x91)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VPGATHERQD vm64z, ymm{k}
+ if len(vv) == 0 && isVMZ(v0) && isYMMk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPGATHERQD vm64x, xmm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPGATHERQD vm64y, xmm{k}
+ if len(vv) == 0 && isEVEXVMY(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPGATHERQD")
+ }
+ return p
+}
+
+// VPGATHERQQ performs "Gather Packed Quadword Values Using Signed Quadword Indices".
+//
+// Mnemonic : VPGATHERQQ
+// Supported forms : (5 forms)
+//
+// * VPGATHERQQ xmm, vm64x, xmm [AVX2]
+// * VPGATHERQQ ymm, vm64y, ymm [AVX2]
+// * VPGATHERQQ vm64z, zmm{k} [AVX512F]
+// * VPGATHERQQ vm64x, xmm{k} [AVX512F,AVX512VL]
+// * VPGATHERQQ vm64y, ymm{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPGATHERQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VPGATHERQQ", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VPGATHERQQ", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VPGATHERQQ takes 2 or 3 operands")
+ }
+ // VPGATHERQQ xmm, vm64x, xmm
+ if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x91)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VPGATHERQQ ymm, vm64y, ymm
+ if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x91)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ // VPGATHERQQ vm64z, zmm{k}
+ if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPGATHERQQ vm64x, xmm{k}
+ if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPGATHERQQ vm64y, ymm{k}
+ if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0)
+ m.emit(0x91)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPGATHERQQ")
+ }
+ return p
+}
+
+// VPHADDBD performs "Packed Horizontal Add Signed Byte to Signed Doubleword".
+//
+// Mnemonic : VPHADDBD
+// Supported forms : (2 forms)
+//
+// * VPHADDBD xmm, xmm [XOP]
+// * VPHADDBD m128, xmm [XOP]
+//
+func (self *Program) VPHADDBD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDBD", 2, Operands { v0, v1 })
+ // VPHADDBD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDBD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xc2)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDBD")
+ }
+ return p
+}
+
+// VPHADDBQ performs "Packed Horizontal Add Signed Byte to Signed Quadword".
+//
+// Mnemonic : VPHADDBQ
+// Supported forms : (2 forms)
+//
+// * VPHADDBQ xmm, xmm [XOP]
+// * VPHADDBQ m128, xmm [XOP]
+//
+func (self *Program) VPHADDBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDBQ", 2, Operands { v0, v1 })
+ // VPHADDBQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xc3)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDBQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xc3)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDBQ")
+ }
+ return p
+}
+
+// VPHADDBW performs "Packed Horizontal Add Signed Byte to Signed Word".
+//
+// Mnemonic : VPHADDBW
+// Supported forms : (2 forms)
+//
+// * VPHADDBW xmm, xmm [XOP]
+// * VPHADDBW m128, xmm [XOP]
+//
+func (self *Program) VPHADDBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDBW", 2, Operands { v0, v1 })
+ // VPHADDBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xc1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDBW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xc1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDBW")
+ }
+ return p
+}
+
+// VPHADDD performs "Packed Horizontal Add Doubleword Integer".
+//
+// Mnemonic : VPHADDD
+// Supported forms : (4 forms)
+//
+// * VPHADDD xmm, xmm, xmm [AVX]
+// * VPHADDD m128, xmm, xmm [AVX]
+// * VPHADDD ymm, ymm, ymm [AVX2]
+// * VPHADDD m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPHADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPHADDD", 3, Operands { v0, v1, v2 })
+ // VPHADDD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x02)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x02)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPHADDD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x02)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x02)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDD")
+ }
+ return p
+}
+
+// VPHADDDQ performs "Packed Horizontal Add Signed Doubleword to Signed Quadword".
+//
+// Mnemonic : VPHADDDQ
+// Supported forms : (2 forms)
+//
+// * VPHADDDQ xmm, xmm [XOP]
+// * VPHADDDQ m128, xmm [XOP]
+//
+func (self *Program) VPHADDDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDDQ", 2, Operands { v0, v1 })
+ // VPHADDDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xcb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xcb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDDQ")
+ }
+ return p
+}
+
+// VPHADDSW performs "Packed Horizontal Add Signed Word Integers with Signed Saturation".
+//
+// Mnemonic : VPHADDSW
+// Supported forms : (4 forms)
+//
+// * VPHADDSW xmm, xmm, xmm [AVX]
+// * VPHADDSW m128, xmm, xmm [AVX]
+// * VPHADDSW ymm, ymm, ymm [AVX2]
+// * VPHADDSW m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPHADDSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPHADDSW", 3, Operands { v0, v1, v2 })
+ // VPHADDSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x03)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPHADDSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x03)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x03)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDSW")
+ }
+ return p
+}
+
+// VPHADDUBD performs "Packed Horizontal Add Unsigned Byte to Doubleword".
+//
+// Mnemonic : VPHADDUBD
+// Supported forms : (2 forms)
+//
+// * VPHADDUBD xmm, xmm [XOP]
+// * VPHADDUBD m128, xmm [XOP]
+//
+func (self *Program) VPHADDUBD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDUBD", 2, Operands { v0, v1 })
+ // VPHADDUBD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xd2)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDUBD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xd2)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDUBD")
+ }
+ return p
+}
+
+// VPHADDUBQ performs "Packed Horizontal Add Unsigned Byte to Quadword".
+//
+// Mnemonic : VPHADDUBQ
+// Supported forms : (2 forms)
+//
+// * VPHADDUBQ xmm, xmm [XOP]
+// * VPHADDUBQ m128, xmm [XOP]
+//
+func (self *Program) VPHADDUBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDUBQ", 2, Operands { v0, v1 })
+ // VPHADDUBQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDUBQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xd3)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDUBQ")
+ }
+ return p
+}
+
+// VPHADDUBW performs "Packed Horizontal Add Unsigned Byte to Word".
+//
+// Mnemonic : VPHADDUBW
+// Supported forms : (2 forms)
+//
+// * VPHADDUBW xmm, xmm [XOP]
+// * VPHADDUBW m128, xmm [XOP]
+//
+func (self *Program) VPHADDUBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDUBW", 2, Operands { v0, v1 })
+ // VPHADDUBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDUBW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xd1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDUBW")
+ }
+ return p
+}
+
+// VPHADDUDQ performs "Packed Horizontal Add Unsigned Doubleword to Quadword".
+//
+// Mnemonic : VPHADDUDQ
+// Supported forms : (2 forms)
+//
+// * VPHADDUDQ xmm, xmm [XOP]
+// * VPHADDUDQ m128, xmm [XOP]
+//
+func (self *Program) VPHADDUDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDUDQ", 2, Operands { v0, v1 })
+ // VPHADDUDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xdb)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDUDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xdb)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDUDQ")
+ }
+ return p
+}
+
+// VPHADDUWD performs "Packed Horizontal Add Unsigned Word to Doubleword".
+//
+// Mnemonic : VPHADDUWD
+// Supported forms : (2 forms)
+//
+// * VPHADDUWD xmm, xmm [XOP]
+// * VPHADDUWD m128, xmm [XOP]
+//
+func (self *Program) VPHADDUWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDUWD", 2, Operands { v0, v1 })
+ // VPHADDUWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xd6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDUWD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xd6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDUWD")
+ }
+ return p
+}
+
+// VPHADDUWQ performs "Packed Horizontal Add Unsigned Word to Quadword".
+//
+// Mnemonic : VPHADDUWQ
+// Supported forms : (2 forms)
+//
+// * VPHADDUWQ xmm, xmm [XOP]
+// * VPHADDUWQ m128, xmm [XOP]
+//
+func (self *Program) VPHADDUWQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDUWQ", 2, Operands { v0, v1 })
+ // VPHADDUWQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xd7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDUWQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xd7)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDUWQ")
+ }
+ return p
+}
+
+// VPHADDW performs "Packed Horizontal Add Word Integers".
+//
+// Mnemonic : VPHADDW
+// Supported forms : (4 forms)
+//
+// * VPHADDW xmm, xmm, xmm [AVX]
+// * VPHADDW m128, xmm, xmm [AVX]
+// * VPHADDW ymm, ymm, ymm [AVX2]
+// * VPHADDW m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPHADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPHADDW", 3, Operands { v0, v1, v2 })
+ // VPHADDW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPHADDW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x01)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x01)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDW")
+ }
+ return p
+}
+
+// VPHADDWD performs "Packed Horizontal Add Signed Word to Signed Doubleword".
+//
+// Mnemonic : VPHADDWD
+// Supported forms : (2 forms)
+//
+// * VPHADDWD xmm, xmm [XOP]
+// * VPHADDWD m128, xmm [XOP]
+//
+func (self *Program) VPHADDWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDWD", 2, Operands { v0, v1 })
+ // VPHADDWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDWD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xc6)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDWD")
+ }
+ return p
+}
+
+// VPHADDWQ performs "Packed Horizontal Add Signed Word to Signed Quadword".
+//
+// Mnemonic : VPHADDWQ
+// Supported forms : (2 forms)
+//
+// * VPHADDWQ xmm, xmm [XOP]
+// * VPHADDWQ m128, xmm [XOP]
+//
+func (self *Program) VPHADDWQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHADDWQ", 2, Operands { v0, v1 })
+ // VPHADDWQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xc7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHADDWQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xc7)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHADDWQ")
+ }
+ return p
+}
+
+// VPHMINPOSUW performs "Packed Horizontal Minimum of Unsigned Word Integers".
+//
+// Mnemonic : VPHMINPOSUW
+// Supported forms : (2 forms)
+//
+// * VPHMINPOSUW xmm, xmm [AVX]
+// * VPHMINPOSUW m128, xmm [AVX]
+//
+func (self *Program) VPHMINPOSUW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHMINPOSUW", 2, Operands { v0, v1 })
+ // VPHMINPOSUW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x41)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHMINPOSUW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x41)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHMINPOSUW")
+ }
+ return p
+}
+
+// VPHSUBBW performs "Packed Horizontal Subtract Signed Byte to Signed Word".
+//
+// Mnemonic : VPHSUBBW
+// Supported forms : (2 forms)
+//
+// * VPHSUBBW xmm, xmm [XOP]
+// * VPHSUBBW m128, xmm [XOP]
+//
+func (self *Program) VPHSUBBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHSUBBW", 2, Operands { v0, v1 })
+ // VPHSUBBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xe1)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHSUBBW m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xe1)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHSUBBW")
+ }
+ return p
+}
+
+// VPHSUBD performs "Packed Horizontal Subtract Doubleword Integers".
+//
+// Mnemonic : VPHSUBD
+// Supported forms : (4 forms)
+//
+// * VPHSUBD xmm, xmm, xmm [AVX]
+// * VPHSUBD m128, xmm, xmm [AVX]
+// * VPHSUBD ymm, ymm, ymm [AVX2]
+// * VPHSUBD m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPHSUBD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPHSUBD", 3, Operands { v0, v1, v2 })
+ // VPHSUBD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x06)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHSUBD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x06)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPHSUBD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x06)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHSUBD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x06)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHSUBD")
+ }
+ return p
+}
+
+// VPHSUBDQ performs "Packed Horizontal Subtract Signed Doubleword to Signed Quadword".
+//
+// Mnemonic : VPHSUBDQ
+// Supported forms : (2 forms)
+//
+// * VPHSUBDQ xmm, xmm [XOP]
+// * VPHSUBDQ m128, xmm [XOP]
+//
+func (self *Program) VPHSUBDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHSUBDQ", 2, Operands { v0, v1 })
+ // VPHSUBDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xe3)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHSUBDQ m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xe3)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHSUBDQ")
+ }
+ return p
+}
+
+// VPHSUBSW performs "Packed Horizontal Subtract Signed Word Integers with Signed Saturation".
+//
+// Mnemonic : VPHSUBSW
+// Supported forms : (4 forms)
+//
+// * VPHSUBSW xmm, xmm, xmm [AVX]
+// * VPHSUBSW m128, xmm, xmm [AVX]
+// * VPHSUBSW ymm, ymm, ymm [AVX2]
+// * VPHSUBSW m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPHSUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPHSUBSW", 3, Operands { v0, v1, v2 })
+ // VPHSUBSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x07)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHSUBSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x07)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPHSUBSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x07)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHSUBSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x07)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHSUBSW")
+ }
+ return p
+}
+
+// VPHSUBW performs "Packed Horizontal Subtract Word Integers".
+//
+// Mnemonic : VPHSUBW
+// Supported forms : (4 forms)
+//
+// * VPHSUBW xmm, xmm, xmm [AVX]
+// * VPHSUBW m128, xmm, xmm [AVX]
+// * VPHSUBW ymm, ymm, ymm [AVX2]
+// * VPHSUBW m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPHSUBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPHSUBW", 3, Operands { v0, v1, v2 })
+ // VPHSUBW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x05)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHSUBW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x05)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPHSUBW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x05)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHSUBW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x05)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHSUBW")
+ }
+ return p
+}
+
+// VPHSUBWD performs "Packed Horizontal Subtract Signed Word to Signed Doubleword".
+//
+// Mnemonic : VPHSUBWD
+// Supported forms : (2 forms)
+//
+// * VPHSUBWD xmm, xmm [XOP]
+// * VPHSUBWD m128, xmm [XOP]
+//
+func (self *Program) VPHSUBWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPHSUBWD", 2, Operands { v0, v1 })
+ // VPHSUBWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x78)
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPHSUBWD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0xe2)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPHSUBWD")
+ }
+ return p
+}
+
+// VPINSRB performs "Insert Byte".
+//
+// Mnemonic : VPINSRB
+// Supported forms : (4 forms)
+//
+// * VPINSRB imm8, r32, xmm, xmm [AVX]
+// * VPINSRB imm8, m8, xmm, xmm [AVX]
+// * VPINSRB imm8, r32, xmm, xmm [AVX512BW]
+// * VPINSRB imm8, m8, xmm, xmm [AVX512BW]
+//
+func (self *Program) VPINSRB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPINSRB", 4, Operands { v0, v1, v2, v3 })
+ // VPINSRB imm8, r32, xmm, xmm
+ if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRB imm8, m8, xmm, xmm
+ if isImm8(v0) && isM8(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x20)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRB imm8, r32, xmm, xmm
+ if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRB imm8, m8, xmm, xmm
+ if isImm8(v0) && isM8(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x20)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPINSRB")
+ }
+ return p
+}
+
+// VPINSRD performs "Insert Doubleword".
+//
+// Mnemonic : VPINSRD
+// Supported forms : (4 forms)
+//
+// * VPINSRD imm8, r32, xmm, xmm [AVX]
+// * VPINSRD imm8, m32, xmm, xmm [AVX]
+// * VPINSRD imm8, r32, xmm, xmm [AVX512DQ]
+// * VPINSRD imm8, m32, xmm, xmm [AVX512DQ]
+//
+func (self *Program) VPINSRD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPINSRD", 4, Operands { v0, v1, v2, v3 })
+ // VPINSRD imm8, r32, xmm, xmm
+ if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRD imm8, m32, xmm, xmm
+ if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x22)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRD imm8, r32, xmm, xmm
+ if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRD imm8, m32, xmm, xmm
+ if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[3]), addr(v[1]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPINSRD")
+ }
+ return p
+}
+
+// VPINSRQ performs "Insert Quadword".
+//
+// Mnemonic : VPINSRQ
+// Supported forms : (4 forms)
+//
+// * VPINSRQ imm8, r64, xmm, xmm [AVX]
+// * VPINSRQ imm8, m64, xmm, xmm [AVX]
+// * VPINSRQ imm8, r64, xmm, xmm [AVX512DQ]
+// * VPINSRQ imm8, m64, xmm, xmm [AVX512DQ]
+//
+func (self *Program) VPINSRQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPINSRQ", 4, Operands { v0, v1, v2, v3 })
+ // VPINSRQ imm8, r64, xmm, xmm
+ if isImm8(v0) && isReg64(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[2]) << 3))
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRQ imm8, m64, xmm, xmm
+ if isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x22)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRQ imm8, r64, xmm, xmm
+ if isImm8(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRQ imm8, m64, xmm, xmm
+ if isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[3]), addr(v[1]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPINSRQ")
+ }
+ return p
+}
+
+// VPINSRW performs "Insert Word".
+//
+// Mnemonic : VPINSRW
+// Supported forms : (4 forms)
+//
+// * VPINSRW imm8, r32, xmm, xmm [AVX]
+// * VPINSRW imm8, m16, xmm, xmm [AVX]
+// * VPINSRW imm8, r32, xmm, xmm [AVX512BW]
+// * VPINSRW imm8, m16, xmm, xmm [AVX512BW]
+//
+func (self *Program) VPINSRW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPINSRW", 4, Operands { v0, v1, v2, v3 })
+ // VPINSRW imm8, r32, xmm, xmm
+ if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRW imm8, m16, xmm, xmm
+ if isImm8(v0) && isM16(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc4)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRW imm8, r32, xmm, xmm
+ if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
+ m.emit(0xc4)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPINSRW imm8, m16, xmm, xmm
+ if isImm8(v0) && isM16(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0xc4)
+ m.mrsd(lcode(v[3]), addr(v[1]), 2)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPINSRW")
+ }
+ return p
+}
+
+// VPLZCNTD performs "Count the Number of Leading Zero Bits for Packed Doubleword Values".
+//
+// Mnemonic : VPLZCNTD
+// Supported forms : (6 forms)
+//
+// * VPLZCNTD m128/m32bcst, xmm{k}{z} [AVX512CD,AVX512VL]
+// * VPLZCNTD m256/m32bcst, ymm{k}{z} [AVX512CD,AVX512VL]
+// * VPLZCNTD m512/m32bcst, zmm{k}{z} [AVX512CD]
+// * VPLZCNTD xmm, xmm{k}{z} [AVX512CD,AVX512VL]
+// * VPLZCNTD ymm, ymm{k}{z} [AVX512CD,AVX512VL]
+// * VPLZCNTD zmm, zmm{k}{z} [AVX512CD]
+//
+func (self *Program) VPLZCNTD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPLZCNTD", 2, Operands { v0, v1 })
+ // VPLZCNTD m128/m32bcst, xmm{k}{z}
+ if isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPLZCNTD m256/m32bcst, ymm{k}{z}
+ if isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPLZCNTD m512/m32bcst, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPLZCNTD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPLZCNTD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPLZCNTD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPLZCNTD")
+ }
+ return p
+}
+
+// VPLZCNTQ performs "Count the Number of Leading Zero Bits for Packed Quadword Values".
+//
+// Mnemonic : VPLZCNTQ
+// Supported forms : (6 forms)
+//
+// * VPLZCNTQ m128/m64bcst, xmm{k}{z} [AVX512CD,AVX512VL]
+// * VPLZCNTQ m256/m64bcst, ymm{k}{z} [AVX512CD,AVX512VL]
+// * VPLZCNTQ m512/m64bcst, zmm{k}{z} [AVX512CD]
+// * VPLZCNTQ xmm, xmm{k}{z} [AVX512CD,AVX512VL]
+// * VPLZCNTQ ymm, ymm{k}{z} [AVX512CD,AVX512VL]
+// * VPLZCNTQ zmm, zmm{k}{z} [AVX512CD]
+//
+func (self *Program) VPLZCNTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPLZCNTQ", 2, Operands { v0, v1 })
+ // VPLZCNTQ m128/m64bcst, xmm{k}{z}
+ if isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPLZCNTQ m256/m64bcst, ymm{k}{z}
+ if isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPLZCNTQ m512/m64bcst, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x44)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPLZCNTQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPLZCNTQ ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPLZCNTQ zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512CD)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x44)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPLZCNTQ")
+ }
+ return p
+}
+
+// VPMACSDD performs "Packed Multiply Accumulate Signed Doubleword to Signed Doubleword".
+//
+// Mnemonic : VPMACSDD
+// Supported forms : (2 forms)
+//
+// * VPMACSDD xmm, xmm, xmm, xmm [XOP]
+// * VPMACSDD xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSDD", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSDD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x9e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSDD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x9e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSDD")
+ }
+ return p
+}
+
+// VPMACSDQH performs "Packed Multiply Accumulate Signed High Doubleword to Signed Quadword".
+//
+// Mnemonic : VPMACSDQH
+// Supported forms : (2 forms)
+//
+// * VPMACSDQH xmm, xmm, xmm, xmm [XOP]
+// * VPMACSDQH xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSDQH(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSDQH", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSDQH xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x9f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSDQH xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x9f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSDQH")
+ }
+ return p
+}
+
+// VPMACSDQL performs "Packed Multiply Accumulate Signed Low Doubleword to Signed Quadword".
+//
+// Mnemonic : VPMACSDQL
+// Supported forms : (2 forms)
+//
+// * VPMACSDQL xmm, xmm, xmm, xmm [XOP]
+// * VPMACSDQL xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSDQL(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSDQL", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSDQL xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSDQL xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSDQL")
+ }
+ return p
+}
+
+// VPMACSSDD performs "Packed Multiply Accumulate with Saturation Signed Doubleword to Signed Doubleword".
+//
+// Mnemonic : VPMACSSDD
+// Supported forms : (2 forms)
+//
+// * VPMACSSDD xmm, xmm, xmm, xmm [XOP]
+// * VPMACSSDD xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSSDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSSDD", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSSDD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x8e)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSSDD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x8e)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSSDD")
+ }
+ return p
+}
+
+// VPMACSSDQH performs "Packed Multiply Accumulate with Saturation Signed High Doubleword to Signed Quadword".
+//
+// Mnemonic : VPMACSSDQH
+// Supported forms : (2 forms)
+//
+// * VPMACSSDQH xmm, xmm, xmm, xmm [XOP]
+// * VPMACSSDQH xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSSDQH(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSSDQH", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSSDQH xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x8f)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSSDQH xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x8f)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSSDQH")
+ }
+ return p
+}
+
+// VPMACSSDQL performs "Packed Multiply Accumulate with Saturation Signed Low Doubleword to Signed Quadword".
+//
+// Mnemonic : VPMACSSDQL
+// Supported forms : (2 forms)
+//
+// * VPMACSSDQL xmm, xmm, xmm, xmm [XOP]
+// * VPMACSSDQL xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSSDQL(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSSDQL", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSSDQL xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x87)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSSDQL xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x87)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSSDQL")
+ }
+ return p
+}
+
+// VPMACSSWD performs "Packed Multiply Accumulate with Saturation Signed Word to Signed Doubleword".
+//
+// Mnemonic : VPMACSSWD
+// Supported forms : (2 forms)
+//
+// * VPMACSSWD xmm, xmm, xmm, xmm [XOP]
+// * VPMACSSWD xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSSWD", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSSWD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x86)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSSWD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x86)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSSWD")
+ }
+ return p
+}
+
+// VPMACSSWW performs "Packed Multiply Accumulate with Saturation Signed Word to Signed Word".
+//
+// Mnemonic : VPMACSSWW
+// Supported forms : (2 forms)
+//
+// * VPMACSSWW xmm, xmm, xmm, xmm [XOP]
+// * VPMACSSWW xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSSWW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSSWW", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSSWW xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x85)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSSWW xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x85)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSSWW")
+ }
+ return p
+}
+
+// VPMACSWD performs "Packed Multiply Accumulate Signed Word to Signed Doubleword".
+//
+// Mnemonic : VPMACSWD
+// Supported forms : (2 forms)
+//
+// * VPMACSWD xmm, xmm, xmm, xmm [XOP]
+// * VPMACSWD xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSWD", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSWD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSWD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSWD")
+ }
+ return p
+}
+
+// VPMACSWW performs "Packed Multiply Accumulate Signed Word to Signed Word".
+//
+// Mnemonic : VPMACSWW
+// Supported forms : (2 forms)
+//
+// * VPMACSWW xmm, xmm, xmm, xmm [XOP]
+// * VPMACSWW xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMACSWW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMACSWW", 4, Operands { v0, v1, v2, v3 })
+ // VPMACSWW xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0x95)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMACSWW xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x95)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMACSWW")
+ }
+ return p
+}
+
+// VPMADCSSWD performs "Packed Multiply Add Accumulate with Saturation Signed Word to Signed Doubleword".
+//
+// Mnemonic : VPMADCSSWD
+// Supported forms : (2 forms)
+//
+// * VPMADCSSWD xmm, xmm, xmm, xmm [XOP]
+// * VPMADCSSWD xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMADCSSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMADCSSWD", 4, Operands { v0, v1, v2, v3 })
+ // VPMADCSSWD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xa6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMADCSSWD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xa6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMADCSSWD")
+ }
+ return p
+}
+
+// VPMADCSWD performs "Packed Multiply Add Accumulate Signed Word to Signed Doubleword".
+//
+// Mnemonic : VPMADCSWD
+// Supported forms : (2 forms)
+//
+// * VPMADCSWD xmm, xmm, xmm, xmm [XOP]
+// * VPMADCSWD xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPMADCSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPMADCSWD", 4, Operands { v0, v1, v2, v3 })
+ // VPMADCSWD xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xb6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ // VPMADCSWD xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xb6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMADCSWD")
+ }
+ return p
+}
+
+// VPMADD52HUQ performs "Packed Multiply of Unsigned 52-bit Unsigned Integers and Add High 52-bit Products to Quadword Accumulators".
+//
+// Mnemonic : VPMADD52HUQ
+// Supported forms : (6 forms)
+//
+// * VPMADD52HUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL]
+// * VPMADD52HUQ xmm, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL]
+// * VPMADD52HUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL]
+// * VPMADD52HUQ ymm, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL]
+// * VPMADD52HUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512IFMA]
+// * VPMADD52HUQ zmm, zmm, zmm{k}{z} [AVX512IFMA]
+//
+func (self *Program) VPMADD52HUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMADD52HUQ", 3, Operands { v0, v1, v2 })
+ // VPMADD52HUQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512IFMA | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMADD52HUQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512IFMA | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xb5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADD52HUQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512IFMA | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMADD52HUQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512IFMA | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xb5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADD52HUQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512IFMA)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMADD52HUQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512IFMA)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMADD52HUQ")
+ }
+ return p
+}
+
+// VPMADD52LUQ performs "Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit Products to Quadword Accumulators".
+//
+// Mnemonic : VPMADD52LUQ
+// Supported forms : (6 forms)
+//
+// * VPMADD52LUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL]
+// * VPMADD52LUQ xmm, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL]
+// * VPMADD52LUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL]
+// * VPMADD52LUQ ymm, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL]
+// * VPMADD52LUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512IFMA]
+// * VPMADD52LUQ zmm, zmm, zmm{k}{z} [AVX512IFMA]
+//
+func (self *Program) VPMADD52LUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMADD52LUQ", 3, Operands { v0, v1, v2 })
+ // VPMADD52LUQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512IFMA | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMADD52LUQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512IFMA | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xb4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADD52LUQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512IFMA | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMADD52LUQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512IFMA | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xb4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADD52LUQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512IFMA)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xb4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMADD52LUQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512IFMA)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xb4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMADD52LUQ")
+ }
+ return p
+}
+
+// VPMADDUBSW performs "Multiply and Add Packed Signed and Unsigned Byte Integers".
+//
+// Mnemonic : VPMADDUBSW
+// Supported forms : (10 forms)
+//
+// * VPMADDUBSW xmm, xmm, xmm [AVX]
+// * VPMADDUBSW m128, xmm, xmm [AVX]
+// * VPMADDUBSW ymm, ymm, ymm [AVX2]
+// * VPMADDUBSW m256, ymm, ymm [AVX2]
+// * VPMADDUBSW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMADDUBSW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMADDUBSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMADDUBSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMADDUBSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMADDUBSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMADDUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMADDUBSW", 3, Operands { v0, v1, v2 })
+ // VPMADDUBSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDUBSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMADDUBSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDUBSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMADDUBSW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDUBSW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMADDUBSW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDUBSW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMADDUBSW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x04)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDUBSW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x04)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMADDUBSW")
+ }
+ return p
+}
+
+// VPMADDWD performs "Multiply and Add Packed Signed Word Integers".
+//
+// Mnemonic : VPMADDWD
+// Supported forms : (10 forms)
+//
+// * VPMADDWD xmm, xmm, xmm [AVX]
+// * VPMADDWD m128, xmm, xmm [AVX]
+// * VPMADDWD ymm, ymm, ymm [AVX2]
+// * VPMADDWD m256, ymm, ymm [AVX2]
+// * VPMADDWD zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMADDWD m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMADDWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMADDWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMADDWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMADDWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMADDWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMADDWD", 3, Operands { v0, v1, v2 })
+ // VPMADDWD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDWD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMADDWD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDWD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMADDWD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDWD m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMADDWD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDWD m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMADDWD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xf5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMADDWD m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMADDWD")
+ }
+ return p
+}
+
+// VPMASKMOVD performs "Conditional Move Packed Doubleword Integers".
+//
+// Mnemonic : VPMASKMOVD
+// Supported forms : (4 forms)
+//
+// * VPMASKMOVD m128, xmm, xmm [AVX2]
+// * VPMASKMOVD m256, ymm, ymm [AVX2]
+// * VPMASKMOVD xmm, xmm, m128 [AVX2]
+// * VPMASKMOVD ymm, ymm, m256 [AVX2]
+//
+func (self *Program) VPMASKMOVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMASKMOVD", 3, Operands { v0, v1, v2 })
+ // VPMASKMOVD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x8c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMASKMOVD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x8c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMASKMOVD xmm, xmm, m128
+ if isXMM(v0) && isXMM(v1) && isM128(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1]))
+ m.emit(0x8e)
+ m.mrsd(lcode(v[0]), addr(v[2]), 1)
+ })
+ }
+ // VPMASKMOVD ymm, ymm, m256
+ if isYMM(v0) && isYMM(v1) && isM256(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1]))
+ m.emit(0x8e)
+ m.mrsd(lcode(v[0]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMASKMOVD")
+ }
+ return p
+}
+
+// VPMASKMOVQ performs "Conditional Move Packed Quadword Integers".
+//
+// Mnemonic : VPMASKMOVQ
+// Supported forms : (4 forms)
+//
+// * VPMASKMOVQ m128, xmm, xmm [AVX2]
+// * VPMASKMOVQ m256, ymm, ymm [AVX2]
+// * VPMASKMOVQ xmm, xmm, m128 [AVX2]
+// * VPMASKMOVQ ymm, ymm, m256 [AVX2]
+//
+func (self *Program) VPMASKMOVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMASKMOVQ", 3, Operands { v0, v1, v2 })
+ // VPMASKMOVQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x8c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMASKMOVQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x8c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMASKMOVQ xmm, xmm, m128
+ if isXMM(v0) && isXMM(v1) && isM128(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[0]), addr(v[2]), hlcode(v[1]))
+ m.emit(0x8e)
+ m.mrsd(lcode(v[0]), addr(v[2]), 1)
+ })
+ }
+ // VPMASKMOVQ ymm, ymm, m256
+ if isYMM(v0) && isYMM(v1) && isM256(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[0]), addr(v[2]), hlcode(v[1]))
+ m.emit(0x8e)
+ m.mrsd(lcode(v[0]), addr(v[2]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMASKMOVQ")
+ }
+ return p
+}
+
+// VPMAXSB performs "Maximum of Packed Signed Byte Integers".
+//
+// Mnemonic : VPMAXSB
+// Supported forms : (10 forms)
+//
+// * VPMAXSB xmm, xmm, xmm [AVX]
+// * VPMAXSB m128, xmm, xmm [AVX]
+// * VPMAXSB ymm, ymm, ymm [AVX2]
+// * VPMAXSB m256, ymm, ymm [AVX2]
+// * VPMAXSB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMAXSB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMAXSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMAXSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMAXSB", 3, Operands { v0, v1, v2 })
+ // VPMAXSB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x3c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXSB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x3c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXSB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x3c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMAXSB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x3c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMAXSB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x3c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMAXSB")
+ }
+ return p
+}
+
+// VPMAXSD performs "Maximum of Packed Signed Doubleword Integers".
+//
+// Mnemonic : VPMAXSD
+// Supported forms : (10 forms)
+//
+// * VPMAXSD xmm, xmm, xmm [AVX]
+// * VPMAXSD m128, xmm, xmm [AVX]
+// * VPMAXSD ymm, ymm, ymm [AVX2]
+// * VPMAXSD m256, ymm, ymm [AVX2]
+// * VPMAXSD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMAXSD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMAXSD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXSD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXSD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXSD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMAXSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMAXSD", 3, Operands { v0, v1, v2 })
+ // VPMAXSD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x3d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXSD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x3d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXSD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMAXSD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x3d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMAXSD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x3d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMAXSD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x3d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMAXSD")
+ }
+ return p
+}
+
+// VPMAXSQ performs "Maximum of Packed Signed Quadword Integers".
+//
+// Mnemonic : VPMAXSQ
+// Supported forms : (6 forms)
+//
+// * VPMAXSQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMAXSQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMAXSQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXSQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXSQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXSQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMAXSQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMAXSQ", 3, Operands { v0, v1, v2 })
+ // VPMAXSQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMAXSQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x3d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMAXSQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x3d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMAXSQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x3d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMAXSQ")
+ }
+ return p
+}
+
+// VPMAXSW performs "Maximum of Packed Signed Word Integers".
+//
+// Mnemonic : VPMAXSW
+// Supported forms : (10 forms)
+//
+// * VPMAXSW xmm, xmm, xmm [AVX]
+// * VPMAXSW m128, xmm, xmm [AVX]
+// * VPMAXSW ymm, ymm, ymm [AVX2]
+// * VPMAXSW m256, ymm, ymm [AVX2]
+// * VPMAXSW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMAXSW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMAXSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMAXSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMAXSW", 3, Operands { v0, v1, v2 })
+ // VPMAXSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xee)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xee)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xee)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xee)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXSW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xee)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xee)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMAXSW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xee)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xee)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMAXSW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xee)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXSW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xee)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMAXSW")
+ }
+ return p
+}
+
+// VPMAXUB performs "Maximum of Packed Unsigned Byte Integers".
+//
+// Mnemonic : VPMAXUB
+// Supported forms : (10 forms)
+//
+// * VPMAXUB xmm, xmm, xmm [AVX]
+// * VPMAXUB m128, xmm, xmm [AVX]
+// * VPMAXUB ymm, ymm, ymm [AVX2]
+// * VPMAXUB m256, ymm, ymm [AVX2]
+// * VPMAXUB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMAXUB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMAXUB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXUB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXUB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXUB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMAXUB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMAXUB", 3, Operands { v0, v1, v2 })
+ // VPMAXUB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xde)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xde)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXUB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xde)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xde)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXUB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xde)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xde)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMAXUB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xde)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xde)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMAXUB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xde)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xde)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMAXUB")
+ }
+ return p
+}
+
+// VPMAXUD performs "Maximum of Packed Unsigned Doubleword Integers".
+//
+// Mnemonic : VPMAXUD
+// Supported forms : (10 forms)
+//
+// * VPMAXUD xmm, xmm, xmm [AVX]
+// * VPMAXUD m128, xmm, xmm [AVX]
+// * VPMAXUD ymm, ymm, ymm [AVX2]
+// * VPMAXUD m256, ymm, ymm [AVX2]
+// * VPMAXUD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMAXUD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMAXUD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXUD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXUD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXUD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMAXUD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMAXUD", 3, Operands { v0, v1, v2 })
+ // VPMAXUD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXUD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXUD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMAXUD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMAXUD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMAXUD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMAXUD")
+ }
+ return p
+}
+
+// VPMAXUQ performs "Maximum of Packed Unsigned Quadword Integers".
+//
+// Mnemonic : VPMAXUQ
+// Supported forms : (6 forms)
+//
+// * VPMAXUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMAXUQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMAXUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXUQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMAXUQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMAXUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMAXUQ", 3, Operands { v0, v1, v2 })
+ // VPMAXUQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMAXUQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMAXUQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMAXUQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x3f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMAXUQ")
+ }
+ return p
+}
+
+// VPMAXUW performs "Maximum of Packed Unsigned Word Integers".
+//
+// Mnemonic : VPMAXUW
+// Supported forms : (10 forms)
+//
+// * VPMAXUW xmm, xmm, xmm [AVX]
+// * VPMAXUW m128, xmm, xmm [AVX]
+// * VPMAXUW ymm, ymm, ymm [AVX2]
+// * VPMAXUW m256, ymm, ymm [AVX2]
+// * VPMAXUW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMAXUW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMAXUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMAXUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMAXUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMAXUW", 3, Operands { v0, v1, v2 })
+ // VPMAXUW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXUW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMAXUW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMAXUW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMAXUW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x3e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMAXUW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3e)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMAXUW")
+ }
+ return p
+}
+
+// VPMINSB performs "Minimum of Packed Signed Byte Integers".
+//
+// Mnemonic : VPMINSB
+// Supported forms : (10 forms)
+//
+// * VPMINSB xmm, xmm, xmm [AVX]
+// * VPMINSB m128, xmm, xmm [AVX]
+// * VPMINSB ymm, ymm, ymm [AVX2]
+// * VPMINSB m256, ymm, ymm [AVX2]
+// * VPMINSB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMINSB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMINSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMINSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMINSB", 3, Operands { v0, v1, v2 })
+ // VPMINSB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x38)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINSB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x38)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINSB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x38)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMINSB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x38)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMINSB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x38)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMINSB")
+ }
+ return p
+}
+
+// VPMINSD performs "Minimum of Packed Signed Doubleword Integers".
+//
+// Mnemonic : VPMINSD
+// Supported forms : (10 forms)
+//
+// * VPMINSD xmm, xmm, xmm [AVX]
+// * VPMINSD m128, xmm, xmm [AVX]
+// * VPMINSD ymm, ymm, ymm [AVX2]
+// * VPMINSD m256, ymm, ymm [AVX2]
+// * VPMINSD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMINSD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMINSD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMINSD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMINSD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMINSD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMINSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMINSD", 3, Operands { v0, v1, v2 })
+ // VPMINSD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x39)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINSD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x39)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINSD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x39)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMINSD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x39)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMINSD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x39)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMINSD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMINSD")
+ }
+ return p
+}
+
+// VPMINSQ performs "Minimum of Packed Signed Quadword Integers".
+//
+// Mnemonic : VPMINSQ
+// Supported forms : (6 forms)
+//
+// * VPMINSQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMINSQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMINSQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMINSQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMINSQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMINSQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMINSQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMINSQ", 3, Operands { v0, v1, v2 })
+ // VPMINSQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x39)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMINSQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x39)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMINSQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x39)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMINSQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMINSQ")
+ }
+ return p
+}
+
+// VPMINSW performs "Minimum of Packed Signed Word Integers".
+//
+// Mnemonic : VPMINSW
+// Supported forms : (10 forms)
+//
+// * VPMINSW xmm, xmm, xmm [AVX]
+// * VPMINSW m128, xmm, xmm [AVX]
+// * VPMINSW ymm, ymm, ymm [AVX2]
+// * VPMINSW m256, ymm, ymm [AVX2]
+// * VPMINSW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMINSW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMINSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMINSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMINSW", 3, Operands { v0, v1, v2 })
+ // VPMINSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xea)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xea)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xea)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xea)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINSW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xea)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xea)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMINSW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xea)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xea)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMINSW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xea)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINSW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xea)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMINSW")
+ }
+ return p
+}
+
+// VPMINUB performs "Minimum of Packed Unsigned Byte Integers".
+//
+// Mnemonic : VPMINUB
+// Supported forms : (10 forms)
+//
+// * VPMINUB xmm, xmm, xmm [AVX]
+// * VPMINUB m128, xmm, xmm [AVX]
+// * VPMINUB ymm, ymm, ymm [AVX2]
+// * VPMINUB m256, ymm, ymm [AVX2]
+// * VPMINUB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMINUB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMINUB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINUB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINUB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINUB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMINUB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMINUB", 3, Operands { v0, v1, v2 })
+ // VPMINUB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xda)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xda)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINUB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xda)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xda)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINUB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xda)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xda)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMINUB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xda)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xda)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMINUB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xda)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xda)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMINUB")
+ }
+ return p
+}
+
+// VPMINUD performs "Minimum of Packed Unsigned Doubleword Integers".
+//
+// Mnemonic : VPMINUD
+// Supported forms : (10 forms)
+//
+// * VPMINUD xmm, xmm, xmm [AVX]
+// * VPMINUD m128, xmm, xmm [AVX]
+// * VPMINUD ymm, ymm, ymm [AVX2]
+// * VPMINUD m256, ymm, ymm [AVX2]
+// * VPMINUD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMINUD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMINUD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMINUD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMINUD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMINUD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMINUD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMINUD", 3, Operands { v0, v1, v2 })
+ // VPMINUD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINUD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINUD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMINUD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMINUD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMINUD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMINUD")
+ }
+ return p
+}
+
+// VPMINUQ performs "Minimum of Packed Unsigned Quadword Integers".
+//
+// Mnemonic : VPMINUQ
+// Supported forms : (6 forms)
+//
+// * VPMINUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMINUQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMINUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMINUQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMINUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMINUQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMINUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMINUQ", 3, Operands { v0, v1, v2 })
+ // VPMINUQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMINUQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMINUQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x3b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMINUQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x3b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMINUQ")
+ }
+ return p
+}
+
+// VPMINUW performs "Minimum of Packed Unsigned Word Integers".
+//
+// Mnemonic : VPMINUW
+// Supported forms : (10 forms)
+//
+// * VPMINUW xmm, xmm, xmm [AVX]
+// * VPMINUW m128, xmm, xmm [AVX]
+// * VPMINUW ymm, ymm, ymm [AVX2]
+// * VPMINUW m256, ymm, ymm [AVX2]
+// * VPMINUW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMINUW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMINUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMINUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMINUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMINUW", 3, Operands { v0, v1, v2 })
+ // VPMINUW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINUW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x3a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMINUW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMINUW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMINUW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x3a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMINUW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x3a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMINUW")
+ }
+ return p
+}
+
+// VPMOVB2M performs "Move Signs of Packed Byte Integers to Mask Register".
+//
+// Mnemonic : VPMOVB2M
+// Supported forms : (3 forms)
+//
+// * VPMOVB2M zmm, k [AVX512BW]
+// * VPMOVB2M xmm, k [AVX512BW,AVX512VL]
+// * VPMOVB2M ymm, k [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMOVB2M(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVB2M", 2, Operands { v0, v1 })
+ // VPMOVB2M zmm, k
+ if isZMM(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x48)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVB2M xmm, k
+ if isEVEXXMM(v0) && isK(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x08)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVB2M ymm, k
+ if isEVEXYMM(v0) && isK(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x28)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVB2M")
+ }
+ return p
+}
+
+// VPMOVD2M performs "Move Signs of Packed Doubleword Integers to Mask Register".
+//
+// Mnemonic : VPMOVD2M
+// Supported forms : (3 forms)
+//
+// * VPMOVD2M zmm, k [AVX512DQ]
+// * VPMOVD2M xmm, k [AVX512DQ,AVX512VL]
+// * VPMOVD2M ymm, k [AVX512DQ,AVX512VL]
+//
+func (self *Program) VPMOVD2M(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVD2M", 2, Operands { v0, v1 })
+ // VPMOVD2M zmm, k
+ if isZMM(v0) && isK(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x48)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVD2M xmm, k
+ if isEVEXXMM(v0) && isK(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x08)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVD2M ymm, k
+ if isEVEXYMM(v0) && isK(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x28)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVD2M")
+ }
+ return p
+}
+
+// VPMOVDB performs "Down Convert Packed Doubleword Values to Byte Values with Truncation".
+//
+// Mnemonic : VPMOVDB
+// Supported forms : (6 forms)
+//
+// * VPMOVDB zmm, xmm{k}{z} [AVX512F]
+// * VPMOVDB zmm, m128{k}{z} [AVX512F]
+// * VPMOVDB xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVDB xmm, m32{k}{z} [AVX512F,AVX512VL]
+// * VPMOVDB ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVDB ymm, m64{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVDB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVDB", 2, Operands { v0, v1 })
+ // VPMOVDB zmm, xmm{k}{z}
+ if isZMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVDB zmm, m128{k}{z}
+ if isZMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x31)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VPMOVDB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVDB xmm, m32{k}{z}
+ if isEVEXXMM(v0) && isM32kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x31)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPMOVDB ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVDB ymm, m64{k}{z}
+ if isEVEXYMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x31)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVDB")
+ }
+ return p
+}
+
+// VPMOVDW performs "Down Convert Packed Doubleword Values to Word Values with Truncation".
+//
+// Mnemonic : VPMOVDW
+// Supported forms : (6 forms)
+//
+// * VPMOVDW zmm, ymm{k}{z} [AVX512F]
+// * VPMOVDW zmm, m256{k}{z} [AVX512F]
+// * VPMOVDW xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVDW xmm, m64{k}{z} [AVX512F,AVX512VL]
+// * VPMOVDW ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVDW ymm, m128{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVDW", 2, Operands { v0, v1 })
+ // VPMOVDW zmm, ymm{k}{z}
+ if isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVDW zmm, m256{k}{z}
+ if isZMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x33)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VPMOVDW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVDW xmm, m64{k}{z}
+ if isEVEXXMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x33)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVDW ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVDW ymm, m128{k}{z}
+ if isEVEXYMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x33)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVDW")
+ }
+ return p
+}
+
+// VPMOVM2B performs "Expand Bits of Mask Register to Packed Byte Integers".
+//
+// Mnemonic : VPMOVM2B
+// Supported forms : (3 forms)
+//
+// * VPMOVM2B k, zmm [AVX512BW]
+// * VPMOVM2B k, xmm [AVX512BW,AVX512VL]
+// * VPMOVM2B k, ymm [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMOVM2B(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVM2B", 2, Operands { v0, v1 })
+ // VPMOVM2B k, zmm
+ if isK(v0) && isZMM(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x48)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVM2B k, xmm
+ if isK(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x08)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVM2B k, ymm
+ if isK(v0) && isEVEXYMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x28)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVM2B")
+ }
+ return p
+}
+
+// VPMOVM2D performs "Expand Bits of Mask Register to Packed Doubleword Integers".
+//
+// Mnemonic : VPMOVM2D
+// Supported forms : (3 forms)
+//
+// * VPMOVM2D k, zmm [AVX512DQ]
+// * VPMOVM2D k, xmm [AVX512DQ,AVX512VL]
+// * VPMOVM2D k, ymm [AVX512DQ,AVX512VL]
+//
+func (self *Program) VPMOVM2D(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVM2D", 2, Operands { v0, v1 })
+ // VPMOVM2D k, zmm
+ if isK(v0) && isZMM(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x48)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVM2D k, xmm
+ if isK(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x08)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVM2D k, ymm
+ if isK(v0) && isEVEXYMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7e)
+ m.emit(0x28)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVM2D")
+ }
+ return p
+}
+
+// VPMOVM2Q performs "Expand Bits of Mask Register to Packed Quadword Integers".
+//
+// Mnemonic : VPMOVM2Q
+// Supported forms : (3 forms)
+//
+// * VPMOVM2Q k, zmm [AVX512DQ]
+// * VPMOVM2Q k, xmm [AVX512DQ,AVX512VL]
+// * VPMOVM2Q k, ymm [AVX512DQ,AVX512VL]
+//
+func (self *Program) VPMOVM2Q(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVM2Q", 2, Operands { v0, v1 })
+ // VPMOVM2Q k, zmm
+ if isK(v0) && isZMM(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x48)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVM2Q k, xmm
+ if isK(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x08)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVM2Q k, ymm
+ if isK(v0) && isEVEXYMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x28)
+ m.emit(0x38)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVM2Q")
+ }
+ return p
+}
+
+// VPMOVM2W performs "Expand Bits of Mask Register to Packed Word Integers".
+//
+// Mnemonic : VPMOVM2W
+// Supported forms : (3 forms)
+//
+// * VPMOVM2W k, zmm [AVX512BW]
+// * VPMOVM2W k, xmm [AVX512BW,AVX512VL]
+// * VPMOVM2W k, ymm [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMOVM2W(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVM2W", 2, Operands { v0, v1 })
+ // VPMOVM2W k, zmm
+ if isK(v0) && isZMM(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x48)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVM2W k, xmm
+ if isK(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x08)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVM2W k, ymm
+ if isK(v0) && isEVEXYMM(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x28)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVM2W")
+ }
+ return p
+}
+
+// VPMOVMSKB performs "Move Byte Mask".
+//
+// Mnemonic : VPMOVMSKB
+// Supported forms : (2 forms)
+//
+// * VPMOVMSKB xmm, r32 [AVX]
+// * VPMOVMSKB ymm, r32 [AVX2]
+//
+func (self *Program) VPMOVMSKB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVMSKB", 2, Operands { v0, v1 })
+ // VPMOVMSKB xmm, r32
+ if isXMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0xd7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVMSKB ymm, r32
+ if isYMM(v0) && isReg32(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), v[0], 0)
+ m.emit(0xd7)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVMSKB")
+ }
+ return p
+}
+
+// VPMOVQ2M performs "Move Signs of Packed Quadword Integers to Mask Register".
+//
+// Mnemonic : VPMOVQ2M
+// Supported forms : (3 forms)
+//
+// * VPMOVQ2M zmm, k [AVX512DQ]
+// * VPMOVQ2M xmm, k [AVX512DQ,AVX512VL]
+// * VPMOVQ2M ymm, k [AVX512DQ,AVX512VL]
+//
+func (self *Program) VPMOVQ2M(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVQ2M", 2, Operands { v0, v1 })
+ // VPMOVQ2M zmm, k
+ if isZMM(v0) && isK(v1) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x48)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVQ2M xmm, k
+ if isEVEXXMM(v0) && isK(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x08)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVQ2M ymm, k
+ if isEVEXYMM(v0) && isK(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x28)
+ m.emit(0x39)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVQ2M")
+ }
+ return p
+}
+
+// VPMOVQB performs "Down Convert Packed Quadword Values to Byte Values with Truncation".
+//
+// Mnemonic : VPMOVQB
+// Supported forms : (6 forms)
+//
+// * VPMOVQB zmm, xmm{k}{z} [AVX512F]
+// * VPMOVQB zmm, m64{k}{z} [AVX512F]
+// * VPMOVQB xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVQB xmm, m16{k}{z} [AVX512F,AVX512VL]
+// * VPMOVQB ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVQB ymm, m32{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVQB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVQB", 2, Operands { v0, v1 })
+ // VPMOVQB zmm, xmm{k}{z}
+ if isZMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVQB zmm, m64{k}{z}
+ if isZMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x32)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVQB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVQB xmm, m16{k}{z}
+ if isEVEXXMM(v0) && isM16kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x32)
+ m.mrsd(lcode(v[0]), addr(v[1]), 2)
+ })
+ }
+ // VPMOVQB ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVQB ymm, m32{k}{z}
+ if isEVEXYMM(v0) && isM32kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x32)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVQB")
+ }
+ return p
+}
+
+// VPMOVQD performs "Down Convert Packed Quadword Values to Doubleword Values with Truncation".
+//
+// Mnemonic : VPMOVQD
+// Supported forms : (6 forms)
+//
+// * VPMOVQD zmm, ymm{k}{z} [AVX512F]
+// * VPMOVQD zmm, m256{k}{z} [AVX512F]
+// * VPMOVQD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVQD xmm, m64{k}{z} [AVX512F,AVX512VL]
+// * VPMOVQD ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVQD ymm, m128{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVQD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVQD", 2, Operands { v0, v1 })
+ // VPMOVQD zmm, ymm{k}{z}
+ if isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x35)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVQD zmm, m256{k}{z}
+ if isZMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x35)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VPMOVQD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x35)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVQD xmm, m64{k}{z}
+ if isEVEXXMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x35)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVQD ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x35)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVQD ymm, m128{k}{z}
+ if isEVEXYMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x35)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVQD")
+ }
+ return p
+}
+
+// VPMOVQW performs "Down Convert Packed Quadword Values to Word Values with Truncation".
+//
+// Mnemonic : VPMOVQW
+// Supported forms : (6 forms)
+//
+// * VPMOVQW zmm, xmm{k}{z} [AVX512F]
+// * VPMOVQW zmm, m128{k}{z} [AVX512F]
+// * VPMOVQW xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVQW xmm, m32{k}{z} [AVX512F,AVX512VL]
+// * VPMOVQW ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVQW ymm, m64{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVQW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVQW", 2, Operands { v0, v1 })
+ // VPMOVQW zmm, xmm{k}{z}
+ if isZMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x34)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVQW zmm, m128{k}{z}
+ if isZMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x34)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VPMOVQW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x34)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVQW xmm, m32{k}{z}
+ if isEVEXXMM(v0) && isM32kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x34)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPMOVQW ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x34)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVQW ymm, m64{k}{z}
+ if isEVEXYMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x34)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVQW")
+ }
+ return p
+}
+
+// VPMOVSDB performs "Down Convert Packed Doubleword Values to Byte Values with Signed Saturation".
+//
+// Mnemonic : VPMOVSDB
+// Supported forms : (6 forms)
+//
+// * VPMOVSDB zmm, xmm{k}{z} [AVX512F]
+// * VPMOVSDB zmm, m128{k}{z} [AVX512F]
+// * VPMOVSDB xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSDB xmm, m32{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSDB ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSDB ymm, m64{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSDB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSDB", 2, Operands { v0, v1 })
+ // VPMOVSDB zmm, xmm{k}{z}
+ if isZMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSDB zmm, m128{k}{z}
+ if isZMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x21)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VPMOVSDB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSDB xmm, m32{k}{z}
+ if isEVEXXMM(v0) && isM32kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x21)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPMOVSDB ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSDB ymm, m64{k}{z}
+ if isEVEXYMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x21)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSDB")
+ }
+ return p
+}
+
+// VPMOVSDW performs "Down Convert Packed Doubleword Values to Word Values with Signed Saturation".
+//
+// Mnemonic : VPMOVSDW
+// Supported forms : (6 forms)
+//
+// * VPMOVSDW zmm, ymm{k}{z} [AVX512F]
+// * VPMOVSDW zmm, m256{k}{z} [AVX512F]
+// * VPMOVSDW xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSDW xmm, m64{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSDW ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSDW ymm, m128{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSDW", 2, Operands { v0, v1 })
+ // VPMOVSDW zmm, ymm{k}{z}
+ if isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSDW zmm, m256{k}{z}
+ if isZMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x23)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VPMOVSDW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSDW xmm, m64{k}{z}
+ if isEVEXXMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x23)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVSDW ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSDW ymm, m128{k}{z}
+ if isEVEXYMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x23)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSDW")
+ }
+ return p
+}
+
+// VPMOVSQB performs "Down Convert Packed Quadword Values to Byte Values with Signed Saturation".
+//
+// Mnemonic : VPMOVSQB
+// Supported forms : (6 forms)
+//
+// * VPMOVSQB zmm, xmm{k}{z} [AVX512F]
+// * VPMOVSQB zmm, m64{k}{z} [AVX512F]
+// * VPMOVSQB xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSQB xmm, m16{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSQB ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSQB ymm, m32{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSQB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSQB", 2, Operands { v0, v1 })
+ // VPMOVSQB zmm, xmm{k}{z}
+ if isZMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSQB zmm, m64{k}{z}
+ if isZMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVSQB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSQB xmm, m16{k}{z}
+ if isEVEXXMM(v0) && isM16kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[0]), addr(v[1]), 2)
+ })
+ }
+ // VPMOVSQB ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSQB ymm, m32{k}{z}
+ if isEVEXYMM(v0) && isM32kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSQB")
+ }
+ return p
+}
+
+// VPMOVSQD performs "Down Convert Packed Quadword Values to Doubleword Values with Signed Saturation".
+//
+// Mnemonic : VPMOVSQD
+// Supported forms : (6 forms)
+//
+// * VPMOVSQD zmm, ymm{k}{z} [AVX512F]
+// * VPMOVSQD zmm, m256{k}{z} [AVX512F]
+// * VPMOVSQD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSQD xmm, m64{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSQD ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSQD ymm, m128{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSQD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSQD", 2, Operands { v0, v1 })
+ // VPMOVSQD zmm, ymm{k}{z}
+ if isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSQD zmm, m256{k}{z}
+ if isZMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x25)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VPMOVSQD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSQD xmm, m64{k}{z}
+ if isEVEXXMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x25)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVSQD ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSQD ymm, m128{k}{z}
+ if isEVEXYMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x25)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSQD")
+ }
+ return p
+}
+
+// VPMOVSQW performs "Down Convert Packed Quadword Values to Word Values with Signed Saturation".
+//
+// Mnemonic : VPMOVSQW
+// Supported forms : (6 forms)
+//
+// * VPMOVSQW zmm, xmm{k}{z} [AVX512F]
+// * VPMOVSQW zmm, m128{k}{z} [AVX512F]
+// * VPMOVSQW xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSQW xmm, m32{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSQW ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSQW ymm, m64{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSQW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSQW", 2, Operands { v0, v1 })
+ // VPMOVSQW zmm, xmm{k}{z}
+ if isZMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x24)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSQW zmm, m128{k}{z}
+ if isZMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x24)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VPMOVSQW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x24)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSQW xmm, m32{k}{z}
+ if isEVEXXMM(v0) && isM32kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x24)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPMOVSQW ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x24)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSQW ymm, m64{k}{z}
+ if isEVEXYMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x24)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSQW")
+ }
+ return p
+}
+
+// VPMOVSWB performs "Down Convert Packed Word Values to Byte Values with Signed Saturation".
+//
+// Mnemonic : VPMOVSWB
+// Supported forms : (6 forms)
+//
+// * VPMOVSWB zmm, ymm{k}{z} [AVX512BW]
+// * VPMOVSWB zmm, m256{k}{z} [AVX512BW]
+// * VPMOVSWB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVSWB xmm, m64{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVSWB ymm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVSWB ymm, m128{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMOVSWB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSWB", 2, Operands { v0, v1 })
+ // VPMOVSWB zmm, ymm{k}{z}
+ if isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSWB zmm, m256{k}{z}
+ if isZMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x20)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VPMOVSWB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSWB xmm, m64{k}{z}
+ if isEVEXXMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x20)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVSWB ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVSWB ymm, m128{k}{z}
+ if isEVEXYMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x20)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSWB")
+ }
+ return p
+}
+
+// VPMOVSXBD performs "Move Packed Byte Integers to Doubleword Integers with Sign Extension".
+//
+// Mnemonic : VPMOVSXBD
+// Supported forms : (10 forms)
+//
+// * VPMOVSXBD xmm, xmm [AVX]
+// * VPMOVSXBD m32, xmm [AVX]
+// * VPMOVSXBD xmm, ymm [AVX2]
+// * VPMOVSXBD m64, ymm [AVX2]
+// * VPMOVSXBD xmm, zmm{k}{z} [AVX512F]
+// * VPMOVSXBD m128, zmm{k}{z} [AVX512F]
+// * VPMOVSXBD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXBD xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXBD m32, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXBD m64, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSXBD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSXBD", 2, Operands { v0, v1 })
+ // VPMOVSXBD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBD m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x21)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXBD xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBD m64, ymm
+ if isM64(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x21)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXBD xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBD m128, zmm{k}{z}
+ if isM128(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x21)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPMOVSXBD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBD xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x21)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBD m32, xmm{k}{z}
+ if isM32(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x21)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPMOVSXBD m64, ymm{k}{z}
+ if isM64(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x21)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSXBD")
+ }
+ return p
+}
+
+// VPMOVSXBQ performs "Move Packed Byte Integers to Quadword Integers with Sign Extension".
+//
+// Mnemonic : VPMOVSXBQ
+// Supported forms : (10 forms)
+//
+// * VPMOVSXBQ xmm, xmm [AVX]
+// * VPMOVSXBQ m16, xmm [AVX]
+// * VPMOVSXBQ xmm, ymm [AVX2]
+// * VPMOVSXBQ m32, ymm [AVX2]
+// * VPMOVSXBQ xmm, zmm{k}{z} [AVX512F]
+// * VPMOVSXBQ m64, zmm{k}{z} [AVX512F]
+// * VPMOVSXBQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXBQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXBQ m16, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXBQ m32, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSXBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSXBQ", 2, Operands { v0, v1 })
+ // VPMOVSXBQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBQ m16, xmm
+ if isM16(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXBQ xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBQ m32, ymm
+ if isM32(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXBQ xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBQ m64, zmm{k}{z}
+ if isM64(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPMOVSXBQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBQ xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x22)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBQ m16, xmm{k}{z}
+ if isM16(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[1]), addr(v[0]), 2)
+ })
+ }
+ // VPMOVSXBQ m32, ymm{k}{z}
+ if isM32(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x22)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSXBQ")
+ }
+ return p
+}
+
+// VPMOVSXBW performs "Move Packed Byte Integers to Word Integers with Sign Extension".
+//
+// Mnemonic : VPMOVSXBW
+// Supported forms : (10 forms)
+//
+// * VPMOVSXBW xmm, xmm [AVX]
+// * VPMOVSXBW m64, xmm [AVX]
+// * VPMOVSXBW xmm, ymm [AVX2]
+// * VPMOVSXBW m128, ymm [AVX2]
+// * VPMOVSXBW ymm, zmm{k}{z} [AVX512BW]
+// * VPMOVSXBW m256, zmm{k}{z} [AVX512BW]
+// * VPMOVSXBW xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVSXBW xmm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVSXBW m64, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVSXBW m128, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMOVSXBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSXBW", 2, Operands { v0, v1 })
+ // VPMOVSXBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBW m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x20)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXBW xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBW m128, ymm
+ if isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x20)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXBW ymm, zmm{k}{z}
+ if isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBW m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x20)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPMOVSXBW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBW xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x20)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXBW m64, xmm{k}{z}
+ if isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x20)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPMOVSXBW m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x20)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSXBW")
+ }
+ return p
+}
+
+// VPMOVSXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Sign Extension".
+//
+// Mnemonic : VPMOVSXDQ
+// Supported forms : (10 forms)
+//
+// * VPMOVSXDQ xmm, xmm [AVX]
+// * VPMOVSXDQ m64, xmm [AVX]
+// * VPMOVSXDQ xmm, ymm [AVX2]
+// * VPMOVSXDQ m128, ymm [AVX2]
+// * VPMOVSXDQ ymm, zmm{k}{z} [AVX512F]
+// * VPMOVSXDQ m256, zmm{k}{z} [AVX512F]
+// * VPMOVSXDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXDQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXDQ m64, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXDQ m128, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSXDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSXDQ", 2, Operands { v0, v1 })
+ // VPMOVSXDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXDQ m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x25)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXDQ xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXDQ m128, ymm
+ if isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x25)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXDQ ymm, zmm{k}{z}
+ if isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXDQ m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x25)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPMOVSXDQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXDQ xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXDQ m64, xmm{k}{z}
+ if isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x25)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPMOVSXDQ m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x25)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSXDQ")
+ }
+ return p
+}
+
+// VPMOVSXWD performs "Move Packed Word Integers to Doubleword Integers with Sign Extension".
+//
+// Mnemonic : VPMOVSXWD
+// Supported forms : (10 forms)
+//
+// * VPMOVSXWD xmm, xmm [AVX]
+// * VPMOVSXWD m64, xmm [AVX]
+// * VPMOVSXWD xmm, ymm [AVX2]
+// * VPMOVSXWD m128, ymm [AVX2]
+// * VPMOVSXWD ymm, zmm{k}{z} [AVX512F]
+// * VPMOVSXWD m256, zmm{k}{z} [AVX512F]
+// * VPMOVSXWD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXWD xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXWD m64, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXWD m128, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSXWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSXWD", 2, Operands { v0, v1 })
+ // VPMOVSXWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x23)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXWD xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWD m128, ymm
+ if isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x23)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXWD ymm, zmm{k}{z}
+ if isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWD m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x23)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPMOVSXWD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWD xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWD m64, xmm{k}{z}
+ if isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x23)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPMOVSXWD m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x23)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSXWD")
+ }
+ return p
+}
+
+// VPMOVSXWQ performs "Move Packed Word Integers to Quadword Integers with Sign Extension".
+//
+// Mnemonic : VPMOVSXWQ
+// Supported forms : (10 forms)
+//
+// * VPMOVSXWQ xmm, xmm [AVX]
+// * VPMOVSXWQ m32, xmm [AVX]
+// * VPMOVSXWQ xmm, ymm [AVX2]
+// * VPMOVSXWQ m64, ymm [AVX2]
+// * VPMOVSXWQ xmm, zmm{k}{z} [AVX512F]
+// * VPMOVSXWQ m128, zmm{k}{z} [AVX512F]
+// * VPMOVSXWQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXWQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXWQ m32, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVSXWQ m64, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVSXWQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVSXWQ", 2, Operands { v0, v1 })
+ // VPMOVSXWQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x24)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWQ m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x24)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXWQ xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x24)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWQ m64, ymm
+ if isM64(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x24)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVSXWQ xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x24)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWQ m128, zmm{k}{z}
+ if isM128(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x24)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPMOVSXWQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x24)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWQ xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x24)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVSXWQ m32, xmm{k}{z}
+ if isM32(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x24)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPMOVSXWQ m64, ymm{k}{z}
+ if isM64(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x24)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVSXWQ")
+ }
+ return p
+}
+
+// VPMOVUSDB performs "Down Convert Packed Doubleword Values to Byte Values with Unsigned Saturation".
+//
+// Mnemonic : VPMOVUSDB
+// Supported forms : (6 forms)
+//
+// * VPMOVUSDB zmm, xmm{k}{z} [AVX512F]
+// * VPMOVUSDB zmm, m128{k}{z} [AVX512F]
+// * VPMOVUSDB xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSDB xmm, m32{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSDB ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSDB ymm, m64{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVUSDB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVUSDB", 2, Operands { v0, v1 })
+ // VPMOVUSDB zmm, xmm{k}{z}
+ if isZMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSDB zmm, m128{k}{z}
+ if isZMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VPMOVUSDB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSDB xmm, m32{k}{z}
+ if isEVEXXMM(v0) && isM32kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPMOVUSDB ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSDB ymm, m64{k}{z}
+ if isEVEXYMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVUSDB")
+ }
+ return p
+}
+
+// VPMOVUSDW performs "Down Convert Packed Doubleword Values to Word Values with Unsigned Saturation".
+//
+// Mnemonic : VPMOVUSDW
+// Supported forms : (6 forms)
+//
+// * VPMOVUSDW zmm, ymm{k}{z} [AVX512F]
+// * VPMOVUSDW zmm, m256{k}{z} [AVX512F]
+// * VPMOVUSDW xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSDW xmm, m64{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSDW ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSDW ymm, m128{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVUSDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVUSDW", 2, Operands { v0, v1 })
+ // VPMOVUSDW zmm, ymm{k}{z}
+ if isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSDW zmm, m256{k}{z}
+ if isZMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VPMOVUSDW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSDW xmm, m64{k}{z}
+ if isEVEXXMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVUSDW ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x13)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSDW ymm, m128{k}{z}
+ if isEVEXYMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x13)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVUSDW")
+ }
+ return p
+}
+
+// VPMOVUSQB performs "Down Convert Packed Quadword Values to Byte Values with Unsigned Saturation".
+//
+// Mnemonic : VPMOVUSQB
+// Supported forms : (6 forms)
+//
+// * VPMOVUSQB zmm, xmm{k}{z} [AVX512F]
+// * VPMOVUSQB zmm, m64{k}{z} [AVX512F]
+// * VPMOVUSQB xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSQB xmm, m16{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSQB ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSQB ymm, m32{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVUSQB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVUSQB", 2, Operands { v0, v1 })
+ // VPMOVUSQB zmm, xmm{k}{z}
+ if isZMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSQB zmm, m64{k}{z}
+ if isZMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVUSQB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSQB xmm, m16{k}{z}
+ if isEVEXXMM(v0) && isM16kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[0]), addr(v[1]), 2)
+ })
+ }
+ // VPMOVUSQB ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSQB ymm, m32{k}{z}
+ if isEVEXYMM(v0) && isM32kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVUSQB")
+ }
+ return p
+}
+
+// VPMOVUSQD performs "Down Convert Packed Quadword Values to Doubleword Values with Unsigned Saturation".
+//
+// Mnemonic : VPMOVUSQD
+// Supported forms : (6 forms)
+//
+// * VPMOVUSQD zmm, ymm{k}{z} [AVX512F]
+// * VPMOVUSQD zmm, m256{k}{z} [AVX512F]
+// * VPMOVUSQD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSQD xmm, m64{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSQD ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSQD ymm, m128{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVUSQD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVUSQD", 2, Operands { v0, v1 })
+ // VPMOVUSQD zmm, ymm{k}{z}
+ if isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSQD zmm, m256{k}{z}
+ if isZMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x15)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VPMOVUSQD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSQD xmm, m64{k}{z}
+ if isEVEXXMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x15)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVUSQD ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSQD ymm, m128{k}{z}
+ if isEVEXYMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x15)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVUSQD")
+ }
+ return p
+}
+
+// VPMOVUSQW performs "Down Convert Packed Quadword Values to Word Values with Unsigned Saturation".
+//
+// Mnemonic : VPMOVUSQW
+// Supported forms : (6 forms)
+//
+// * VPMOVUSQW zmm, xmm{k}{z} [AVX512F]
+// * VPMOVUSQW zmm, m128{k}{z} [AVX512F]
+// * VPMOVUSQW xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSQW xmm, m32{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSQW ymm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVUSQW ymm, m64{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVUSQW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVUSQW", 2, Operands { v0, v1 })
+ // VPMOVUSQW zmm, xmm{k}{z}
+ if isZMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSQW zmm, m128{k}{z}
+ if isZMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x14)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ // VPMOVUSQW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSQW xmm, m32{k}{z}
+ if isEVEXXMM(v0) && isM32kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x14)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPMOVUSQW ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSQW ymm, m64{k}{z}
+ if isEVEXYMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x14)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVUSQW")
+ }
+ return p
+}
+
+// VPMOVUSWB performs "Down Convert Packed Word Values to Byte Values with Unsigned Saturation".
+//
+// Mnemonic : VPMOVUSWB
+// Supported forms : (6 forms)
+//
+// * VPMOVUSWB zmm, ymm{k}{z} [AVX512BW]
+// * VPMOVUSWB zmm, m256{k}{z} [AVX512BW]
+// * VPMOVUSWB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVUSWB xmm, m64{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVUSWB ymm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVUSWB ymm, m128{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMOVUSWB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVUSWB", 2, Operands { v0, v1 })
+ // VPMOVUSWB zmm, ymm{k}{z}
+ if isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSWB zmm, m256{k}{z}
+ if isZMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VPMOVUSWB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSWB xmm, m64{k}{z}
+ if isEVEXXMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVUSWB ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVUSWB ymm, m128{k}{z}
+ if isEVEXYMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVUSWB")
+ }
+ return p
+}
+
+// VPMOVW2M performs "Move Signs of Packed Word Integers to Mask Register".
+//
+// Mnemonic : VPMOVW2M
+// Supported forms : (3 forms)
+//
+// * VPMOVW2M zmm, k [AVX512BW]
+// * VPMOVW2M xmm, k [AVX512BW,AVX512VL]
+// * VPMOVW2M ymm, k [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMOVW2M(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVW2M", 2, Operands { v0, v1 })
+ // VPMOVW2M zmm, k
+ if isZMM(v0) && isK(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x48)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVW2M xmm, k
+ if isEVEXXMM(v0) && isK(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x08)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVW2M ymm, k
+ if isEVEXYMM(v0) && isK(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfe)
+ m.emit(0x28)
+ m.emit(0x29)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVW2M")
+ }
+ return p
+}
+
+// VPMOVWB performs "Down Convert Packed Word Values to Byte Values with Truncation".
+//
+// Mnemonic : VPMOVWB
+// Supported forms : (6 forms)
+//
+// * VPMOVWB zmm, ymm{k}{z} [AVX512BW]
+// * VPMOVWB zmm, m256{k}{z} [AVX512BW]
+// * VPMOVWB xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVWB xmm, m64{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVWB ymm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVWB ymm, m128{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMOVWB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVWB", 2, Operands { v0, v1 })
+ // VPMOVWB zmm, ymm{k}{z}
+ if isZMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVWB zmm, m256{k}{z}
+ if isZMM(v0) && isM256kz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x30)
+ m.mrsd(lcode(v[0]), addr(v[1]), 32)
+ })
+ }
+ // VPMOVWB xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVWB xmm, m64{k}{z}
+ if isEVEXXMM(v0) && isM64kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x30)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPMOVWB ymm, xmm{k}{z}
+ if isEVEXYMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // VPMOVWB ymm, m128{k}{z}
+ if isEVEXYMM(v0) && isM128kz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x30)
+ m.mrsd(lcode(v[0]), addr(v[1]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVWB")
+ }
+ return p
+}
+
+// VPMOVZXBD performs "Move Packed Byte Integers to Doubleword Integers with Zero Extension".
+//
+// Mnemonic : VPMOVZXBD
+// Supported forms : (10 forms)
+//
+// * VPMOVZXBD xmm, xmm [AVX]
+// * VPMOVZXBD m32, xmm [AVX]
+// * VPMOVZXBD xmm, ymm [AVX2]
+// * VPMOVZXBD m64, ymm [AVX2]
+// * VPMOVZXBD xmm, zmm{k}{z} [AVX512F]
+// * VPMOVZXBD m128, zmm{k}{z} [AVX512F]
+// * VPMOVZXBD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXBD xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXBD m32, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXBD m64, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVZXBD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVZXBD", 2, Operands { v0, v1 })
+ // VPMOVZXBD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBD m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x31)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXBD xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBD m64, ymm
+ if isM64(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x31)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXBD xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBD m128, zmm{k}{z}
+ if isM128(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x31)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPMOVZXBD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBD xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBD m32, xmm{k}{z}
+ if isM32(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x31)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPMOVZXBD m64, ymm{k}{z}
+ if isM64(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x31)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVZXBD")
+ }
+ return p
+}
+
+// VPMOVZXBQ performs "Move Packed Byte Integers to Quadword Integers with Zero Extension".
+//
+// Mnemonic : VPMOVZXBQ
+// Supported forms : (10 forms)
+//
+// * VPMOVZXBQ xmm, xmm [AVX]
+// * VPMOVZXBQ m16, xmm [AVX]
+// * VPMOVZXBQ xmm, ymm [AVX2]
+// * VPMOVZXBQ m32, ymm [AVX2]
+// * VPMOVZXBQ xmm, zmm{k}{z} [AVX512F]
+// * VPMOVZXBQ m64, zmm{k}{z} [AVX512F]
+// * VPMOVZXBQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXBQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXBQ m16, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXBQ m32, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVZXBQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVZXBQ", 2, Operands { v0, v1 })
+ // VPMOVZXBQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBQ m16, xmm
+ if isM16(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x32)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXBQ xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBQ m32, ymm
+ if isM32(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x32)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXBQ xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBQ m64, zmm{k}{z}
+ if isM64(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x32)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPMOVZXBQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBQ xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBQ m16, xmm{k}{z}
+ if isM16(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x32)
+ m.mrsd(lcode(v[1]), addr(v[0]), 2)
+ })
+ }
+ // VPMOVZXBQ m32, ymm{k}{z}
+ if isM32(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x32)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVZXBQ")
+ }
+ return p
+}
+
+// VPMOVZXBW performs "Move Packed Byte Integers to Word Integers with Zero Extension".
+//
+// Mnemonic : VPMOVZXBW
+// Supported forms : (10 forms)
+//
+// * VPMOVZXBW xmm, xmm [AVX]
+// * VPMOVZXBW m64, xmm [AVX]
+// * VPMOVZXBW xmm, ymm [AVX2]
+// * VPMOVZXBW m128, ymm [AVX2]
+// * VPMOVZXBW ymm, zmm{k}{z} [AVX512BW]
+// * VPMOVZXBW m256, zmm{k}{z} [AVX512BW]
+// * VPMOVZXBW xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVZXBW xmm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVZXBW m64, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMOVZXBW m128, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMOVZXBW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVZXBW", 2, Operands { v0, v1 })
+ // VPMOVZXBW xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBW m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x30)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXBW xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBW m128, ymm
+ if isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x30)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXBW ymm, zmm{k}{z}
+ if isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBW m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x30)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPMOVZXBW xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBW xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXBW m64, xmm{k}{z}
+ if isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x30)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPMOVZXBW m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x30)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVZXBW")
+ }
+ return p
+}
+
+// VPMOVZXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Zero Extension".
+//
+// Mnemonic : VPMOVZXDQ
+// Supported forms : (10 forms)
+//
+// * VPMOVZXDQ xmm, xmm [AVX]
+// * VPMOVZXDQ m64, xmm [AVX]
+// * VPMOVZXDQ xmm, ymm [AVX2]
+// * VPMOVZXDQ m128, ymm [AVX2]
+// * VPMOVZXDQ ymm, zmm{k}{z} [AVX512F]
+// * VPMOVZXDQ m256, zmm{k}{z} [AVX512F]
+// * VPMOVZXDQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXDQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXDQ m64, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXDQ m128, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVZXDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVZXDQ", 2, Operands { v0, v1 })
+ // VPMOVZXDQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x35)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXDQ m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x35)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXDQ xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x35)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXDQ m128, ymm
+ if isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x35)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXDQ ymm, zmm{k}{z}
+ if isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x35)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXDQ m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x35)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPMOVZXDQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x35)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXDQ xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x35)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXDQ m64, xmm{k}{z}
+ if isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x35)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPMOVZXDQ m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x35)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVZXDQ")
+ }
+ return p
+}
+
+// VPMOVZXWD performs "Move Packed Word Integers to Doubleword Integers with Zero Extension".
+//
+// Mnemonic : VPMOVZXWD
+// Supported forms : (10 forms)
+//
+// * VPMOVZXWD xmm, xmm [AVX]
+// * VPMOVZXWD m64, xmm [AVX]
+// * VPMOVZXWD xmm, ymm [AVX2]
+// * VPMOVZXWD m128, ymm [AVX2]
+// * VPMOVZXWD ymm, zmm{k}{z} [AVX512F]
+// * VPMOVZXWD m256, zmm{k}{z} [AVX512F]
+// * VPMOVZXWD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXWD xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXWD m64, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXWD m128, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVZXWD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVZXWD", 2, Operands { v0, v1 })
+ // VPMOVZXWD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWD m64, xmm
+ if isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x33)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXWD xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWD m128, ymm
+ if isM128(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x33)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXWD ymm, zmm{k}{z}
+ if isEVEXYMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWD m256, zmm{k}{z}
+ if isM256(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x33)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VPMOVZXWD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWD xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWD m64, xmm{k}{z}
+ if isM64(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x33)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VPMOVZXWD m128, ymm{k}{z}
+ if isM128(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x33)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVZXWD")
+ }
+ return p
+}
+
+// VPMOVZXWQ performs "Move Packed Word Integers to Quadword Integers with Zero Extension".
+//
+// Mnemonic : VPMOVZXWQ
+// Supported forms : (10 forms)
+//
+// * VPMOVZXWQ xmm, xmm [AVX]
+// * VPMOVZXWQ m32, xmm [AVX]
+// * VPMOVZXWQ xmm, ymm [AVX2]
+// * VPMOVZXWQ m64, ymm [AVX2]
+// * VPMOVZXWQ xmm, zmm{k}{z} [AVX512F]
+// * VPMOVZXWQ m128, zmm{k}{z} [AVX512F]
+// * VPMOVZXWQ xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXWQ xmm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXWQ m32, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMOVZXWQ m64, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMOVZXWQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPMOVZXWQ", 2, Operands { v0, v1 })
+ // VPMOVZXWQ xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x34)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWQ m32, xmm
+ if isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x34)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXWQ xmm, ymm
+ if isXMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x34)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWQ m64, ymm
+ if isM64(v0) && isYMM(v1) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x34)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPMOVZXWQ xmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x34)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWQ m128, zmm{k}{z}
+ if isM128(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x34)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VPMOVZXWQ xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x34)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWQ xmm, ymm{k}{z}
+ if isEVEXXMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x34)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMOVZXWQ m32, xmm{k}{z}
+ if isM32(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x34)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VPMOVZXWQ m64, ymm{k}{z}
+ if isM64(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0)
+ m.emit(0x34)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMOVZXWQ")
+ }
+ return p
+}
+
+// VPMULDQ performs "Multiply Packed Signed Doubleword Integers and Store Quadword Result".
+//
+// Mnemonic : VPMULDQ
+// Supported forms : (10 forms)
+//
+// * VPMULDQ xmm, xmm, xmm [AVX]
+// * VPMULDQ m128, xmm, xmm [AVX]
+// * VPMULDQ ymm, ymm, ymm [AVX2]
+// * VPMULDQ m256, ymm, ymm [AVX2]
+// * VPMULDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMULDQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMULDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMULDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMULDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMULDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMULDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMULDQ", 3, Operands { v0, v1, v2 })
+ // VPMULDQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULDQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x28)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULDQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULDQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x28)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULDQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x28)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMULDQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULDQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x28)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMULDQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULDQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x28)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMULDQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x28)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMULDQ")
+ }
+ return p
+}
+
+// VPMULHRSW performs "Packed Multiply Signed Word Integers and Store High Result with Round and Scale".
+//
+// Mnemonic : VPMULHRSW
+// Supported forms : (10 forms)
+//
+// * VPMULHRSW xmm, xmm, xmm [AVX]
+// * VPMULHRSW m128, xmm, xmm [AVX]
+// * VPMULHRSW ymm, ymm, ymm [AVX2]
+// * VPMULHRSW m256, ymm, ymm [AVX2]
+// * VPMULHRSW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMULHRSW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMULHRSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULHRSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULHRSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULHRSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMULHRSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMULHRSW", 3, Operands { v0, v1, v2 })
+ // VPMULHRSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHRSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x0b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULHRSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHRSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x0b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULHRSW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHRSW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x0b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMULHRSW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHRSW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x0b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMULHRSW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHRSW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x0b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMULHRSW")
+ }
+ return p
+}
+
+// VPMULHUW performs "Multiply Packed Unsigned Word Integers and Store High Result".
+//
+// Mnemonic : VPMULHUW
+// Supported forms : (10 forms)
+//
+// * VPMULHUW xmm, xmm, xmm [AVX]
+// * VPMULHUW m128, xmm, xmm [AVX]
+// * VPMULHUW ymm, ymm, ymm [AVX2]
+// * VPMULHUW m256, ymm, ymm [AVX2]
+// * VPMULHUW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMULHUW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMULHUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULHUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULHUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULHUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMULHUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMULHUW", 3, Operands { v0, v1, v2 })
+ // VPMULHUW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHUW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULHUW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHUW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULHUW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xe4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHUW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMULHUW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xe4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHUW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMULHUW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xe4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHUW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMULHUW")
+ }
+ return p
+}
+
+// VPMULHW performs "Multiply Packed Signed Word Integers and Store High Result".
+//
+// Mnemonic : VPMULHW
+// Supported forms : (10 forms)
+//
+// * VPMULHW xmm, xmm, xmm [AVX]
+// * VPMULHW m128, xmm, xmm [AVX]
+// * VPMULHW ymm, ymm, ymm [AVX2]
+// * VPMULHW m256, ymm, ymm [AVX2]
+// * VPMULHW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMULHW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMULHW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULHW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULHW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULHW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMULHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMULHW", 3, Operands { v0, v1, v2 })
+ // VPMULHW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULHW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULHW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xe5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMULHW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xe5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMULHW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xe5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULHW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMULHW")
+ }
+ return p
+}
+
+// VPMULLD performs "Multiply Packed Signed Doubleword Integers and Store Low Result".
+//
+// Mnemonic : VPMULLD
+// Supported forms : (10 forms)
+//
+// * VPMULLD xmm, xmm, xmm [AVX]
+// * VPMULLD m128, xmm, xmm [AVX]
+// * VPMULLD ymm, ymm, ymm [AVX2]
+// * VPMULLD m256, ymm, ymm [AVX2]
+// * VPMULLD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMULLD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMULLD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMULLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMULLD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMULLD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMULLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMULLD", 3, Operands { v0, v1, v2 })
+ // VPMULLD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULLD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULLD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMULLD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMULLD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMULLD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMULLD")
+ }
+ return p
+}
+
+// VPMULLQ performs "Multiply Packed Signed Quadword Integers and Store Low Result".
+//
+// Mnemonic : VPMULLQ
+// Supported forms : (6 forms)
+//
+// * VPMULLQ m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VPMULLQ zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VPMULLQ m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VPMULLQ xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VPMULLQ m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VPMULLQ ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VPMULLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMULLQ", 3, Operands { v0, v1, v2 })
+ // VPMULLQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMULLQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMULLQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x40)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMULLQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x40)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMULLQ")
+ }
+ return p
+}
+
+// VPMULLW performs "Multiply Packed Signed Word Integers and Store Low Result".
+//
+// Mnemonic : VPMULLW
+// Supported forms : (10 forms)
+//
+// * VPMULLW xmm, xmm, xmm [AVX]
+// * VPMULLW m128, xmm, xmm [AVX]
+// * VPMULLW ymm, ymm, ymm [AVX2]
+// * VPMULLW m256, ymm, ymm [AVX2]
+// * VPMULLW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPMULLW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPMULLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULLW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPMULLW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPMULLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMULLW", 3, Operands { v0, v1, v2 })
+ // VPMULLW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULLW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULLW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xd5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMULLW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xd5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMULLW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xd5)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULLW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd5)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMULLW")
+ }
+ return p
+}
+
+// VPMULTISHIFTQB performs "Select Packed Unaligned Bytes from Quadword Sources".
+//
+// Mnemonic : VPMULTISHIFTQB
+// Supported forms : (6 forms)
+//
+// * VPMULTISHIFTQB m128/m64bcst, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPMULTISHIFTQB xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPMULTISHIFTQB m256/m64bcst, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPMULTISHIFTQB ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL]
+// * VPMULTISHIFTQB m512/m64bcst, zmm, zmm{k}{z} [AVX512VBMI]
+// * VPMULTISHIFTQB zmm, zmm, zmm{k}{z} [AVX512VBMI]
+//
+func (self *Program) VPMULTISHIFTQB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMULTISHIFTQB", 3, Operands { v0, v1, v2 })
+ // VPMULTISHIFTQB m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VBMI | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x83)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMULTISHIFTQB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VBMI | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x83)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULTISHIFTQB m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VBMI | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x83)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMULTISHIFTQB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VBMI | ISA_AVX512VL)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x83)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULTISHIFTQB m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x83)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMULTISHIFTQB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512VBMI)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x83)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMULTISHIFTQB")
+ }
+ return p
+}
+
+// VPMULUDQ performs "Multiply Packed Unsigned Doubleword Integers".
+//
+// Mnemonic : VPMULUDQ
+// Supported forms : (10 forms)
+//
+// * VPMULUDQ xmm, xmm, xmm [AVX]
+// * VPMULUDQ m128, xmm, xmm [AVX]
+// * VPMULUDQ ymm, ymm, ymm [AVX2]
+// * VPMULUDQ m256, ymm, ymm [AVX2]
+// * VPMULUDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPMULUDQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPMULUDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMULUDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPMULUDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPMULUDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPMULUDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPMULUDQ", 3, Operands { v0, v1, v2 })
+ // VPMULUDQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULUDQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULUDQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULUDQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPMULUDQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xf4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPMULUDQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xf4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULUDQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xf4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPMULUDQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xf4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPMULUDQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xf4)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPMULUDQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xf4)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPMULUDQ")
+ }
+ return p
+}
+
+// VPOPCNTD performs "Packed Population Count for Doubleword Integers".
+//
+// Mnemonic : VPOPCNTD
+// Supported forms : (2 forms)
+//
+// * VPOPCNTD m512/m32bcst, zmm{k}{z} [AVX512VPOPCNTDQ]
+// * VPOPCNTD zmm, zmm{k}{z} [AVX512VPOPCNTDQ]
+//
+func (self *Program) VPOPCNTD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPOPCNTD", 2, Operands { v0, v1 })
+ // VPOPCNTD m512/m32bcst, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512VPOPCNTDQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPOPCNTD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512VPOPCNTDQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPOPCNTD")
+ }
+ return p
+}
+
+// VPOPCNTQ performs "Packed Population Count for Quadword Integers".
+//
+// Mnemonic : VPOPCNTQ
+// Supported forms : (2 forms)
+//
+// * VPOPCNTQ m512/m64bcst, zmm{k}{z} [AVX512VPOPCNTDQ]
+// * VPOPCNTQ zmm, zmm{k}{z} [AVX512VPOPCNTDQ]
+//
+func (self *Program) VPOPCNTQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPOPCNTQ", 2, Operands { v0, v1 })
+ // VPOPCNTQ m512/m64bcst, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512VPOPCNTDQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x55)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VPOPCNTQ zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512VPOPCNTDQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x55)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPOPCNTQ")
+ }
+ return p
+}
+
+// VPOR performs "Packed Bitwise Logical OR".
+//
+// Mnemonic : VPOR
+// Supported forms : (4 forms)
+//
+// * VPOR xmm, xmm, xmm [AVX]
+// * VPOR m128, xmm, xmm [AVX]
+// * VPOR ymm, ymm, ymm [AVX2]
+// * VPOR m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPOR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPOR", 3, Operands { v0, v1, v2 })
+ // VPOR xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPOR m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xeb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPOR ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPOR m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xeb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPOR")
+ }
+ return p
+}
+
+// VPORD performs "Bitwise Logical OR of Packed Doubleword Integers".
+//
+// Mnemonic : VPORD
+// Supported forms : (6 forms)
+//
+// * VPORD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPORD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPORD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPORD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPORD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPORD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPORD", 3, Operands { v0, v1, v2 })
+ // VPORD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xeb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPORD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPORD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xeb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPORD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPORD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xeb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPORD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPORD")
+ }
+ return p
+}
+
+// VPORQ performs "Bitwise Logical OR of Packed Quadword Integers".
+//
+// Mnemonic : VPORQ
+// Supported forms : (6 forms)
+//
+// * VPORQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPORQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPORQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPORQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPORQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPORQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPORQ", 3, Operands { v0, v1, v2 })
+ // VPORQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xeb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPORQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPORQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xeb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPORQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPORQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xeb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPORQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xeb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPORQ")
+ }
+ return p
+}
+
+// VPPERM performs "Packed Permute Bytes".
+//
+// Mnemonic : VPPERM
+// Supported forms : (3 forms)
+//
+// * VPPERM xmm, xmm, xmm, xmm [XOP]
+// * VPPERM m128, xmm, xmm, xmm [XOP]
+// * VPPERM xmm, m128, xmm, xmm [XOP]
+//
+func (self *Program) VPPERM(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPPERM", 4, Operands { v0, v1, v2, v3 })
+ // VPPERM xmm, xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[2]) << 3))
+ m.emit(0xa3)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.emit(hlcode(v[0]) << 4)
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[2]) << 3))
+ m.emit(0xa3)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0]))
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VPPERM m128, xmm, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x80, hcode(v[3]), addr(v[0]), hlcode(v[2]))
+ m.emit(0xa3)
+ m.mrsd(lcode(v[3]), addr(v[0]), 1)
+ m.emit(hlcode(v[1]) << 4)
+ })
+ }
+ // VPPERM xmm, m128, xmm, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xa3)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.emit(hlcode(v[0]) << 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPPERM")
+ }
+ return p
+}
+
+// VPROLD performs "Rotate Packed Doubleword Left".
+//
+// Mnemonic : VPROLD
+// Supported forms : (6 forms)
+//
+// * VPROLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VPROLD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPROLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPROLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPROLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPROLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPROLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPROLD", 3, Operands { v0, v1, v2 })
+ // VPROLD imm8, m512/m32bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(1, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLD imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x72)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLD imm8, m128/m32bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(1, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLD imm8, m256/m32bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(1, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLD imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x72)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLD imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x72)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPROLD")
+ }
+ return p
+}
+
+// VPROLQ performs "Rotate Packed Quadword Left".
+//
+// Mnemonic : VPROLQ
+// Supported forms : (6 forms)
+//
+// * VPROLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VPROLQ imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPROLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPROLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPROLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPROLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPROLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPROLQ", 3, Operands { v0, v1, v2 })
+ // VPROLQ imm8, m512/m64bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(1, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLQ imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x72)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLQ imm8, m128/m64bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(1, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLQ imm8, m256/m64bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(1, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLQ imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x72)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROLQ imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x72)
+ m.emit(0xc8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPROLQ")
+ }
+ return p
+}
+
+// VPROLVD performs "Variable Rotate Packed Doubleword Left".
+//
+// Mnemonic : VPROLVD
+// Supported forms : (6 forms)
+//
+// * VPROLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPROLVD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPROLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPROLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPROLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPROLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPROLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPROLVD", 3, Operands { v0, v1, v2 })
+ // VPROLVD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPROLVD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPROLVD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPROLVD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPROLVD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPROLVD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPROLVD")
+ }
+ return p
+}
+
+// VPROLVQ performs "Variable Rotate Packed Quadword Left".
+//
+// Mnemonic : VPROLVQ
+// Supported forms : (6 forms)
+//
+// * VPROLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPROLVQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPROLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPROLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPROLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPROLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPROLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPROLVQ", 3, Operands { v0, v1, v2 })
+ // VPROLVQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPROLVQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPROLVQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPROLVQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPROLVQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPROLVQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPROLVQ")
+ }
+ return p
+}
+
+// VPRORD performs "Rotate Packed Doubleword Right".
+//
+// Mnemonic : VPRORD
+// Supported forms : (6 forms)
+//
+// * VPRORD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VPRORD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPRORD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPRORD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPRORD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPRORD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPRORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPRORD", 3, Operands { v0, v1, v2 })
+ // VPRORD imm8, m512/m32bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(0, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORD imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x72)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORD imm8, m128/m32bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(0, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORD imm8, m256/m32bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(0, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORD imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x72)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORD imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x72)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPRORD")
+ }
+ return p
+}
+
+// VPRORQ performs "Rotate Packed Quadword Right".
+//
+// Mnemonic : VPRORQ
+// Supported forms : (6 forms)
+//
+// * VPRORQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VPRORQ imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPRORQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPRORQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPRORQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPRORQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPRORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPRORQ", 3, Operands { v0, v1, v2 })
+ // VPRORQ imm8, m512/m64bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(0, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORQ imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x72)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORQ imm8, m128/m64bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(0, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORQ imm8, m256/m64bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(0, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORQ imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x72)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPRORQ imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x72)
+ m.emit(0xc0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPRORQ")
+ }
+ return p
+}
+
+// VPRORVD performs "Variable Rotate Packed Doubleword Right".
+//
+// Mnemonic : VPRORVD
+// Supported forms : (6 forms)
+//
+// * VPRORVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPRORVD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPRORVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPRORVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPRORVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPRORVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPRORVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPRORVD", 3, Operands { v0, v1, v2 })
+ // VPRORVD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPRORVD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPRORVD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPRORVD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPRORVD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPRORVD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPRORVD")
+ }
+ return p
+}
+
+// VPRORVQ performs "Variable Rotate Packed Quadword Right".
+//
+// Mnemonic : VPRORVQ
+// Supported forms : (6 forms)
+//
+// * VPRORVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPRORVQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPRORVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPRORVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPRORVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPRORVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPRORVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPRORVQ", 3, Operands { v0, v1, v2 })
+ // VPRORVQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPRORVQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPRORVQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPRORVQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPRORVQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPRORVQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPRORVQ")
+ }
+ return p
+}
+
+// VPROTB performs "Packed Rotate Bytes".
+//
+// Mnemonic : VPROTB
+// Supported forms : (5 forms)
+//
+// * VPROTB imm8, xmm, xmm [XOP]
+// * VPROTB xmm, xmm, xmm [XOP]
+// * VPROTB m128, xmm, xmm [XOP]
+// * VPROTB imm8, m128, xmm [XOP]
+// * VPROTB xmm, m128, xmm [XOP]
+//
+func (self *Program) VPROTB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPROTB", 3, Operands { v0, v1, v2 })
+ // VPROTB imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78)
+ m.emit(0xc0)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROTB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x90)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x90)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPROTB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x90)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPROTB imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0xc0)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROTB xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x90)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPROTB")
+ }
+ return p
+}
+
+// VPROTD performs "Packed Rotate Doublewords".
+//
+// Mnemonic : VPROTD
+// Supported forms : (5 forms)
+//
+// * VPROTD imm8, xmm, xmm [XOP]
+// * VPROTD xmm, xmm, xmm [XOP]
+// * VPROTD m128, xmm, xmm [XOP]
+// * VPROTD imm8, m128, xmm [XOP]
+// * VPROTD xmm, m128, xmm [XOP]
+//
+func (self *Program) VPROTD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPROTD", 3, Operands { v0, v1, v2 })
+ // VPROTD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78)
+ m.emit(0xc2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROTD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x92)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x92)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPROTD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x92)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPROTD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0xc2)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROTD xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x92)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPROTD")
+ }
+ return p
+}
+
+// VPROTQ performs "Packed Rotate Quadwords".
+//
+// Mnemonic : VPROTQ
+// Supported forms : (5 forms)
+//
+// * VPROTQ imm8, xmm, xmm [XOP]
+// * VPROTQ xmm, xmm, xmm [XOP]
+// * VPROTQ m128, xmm, xmm [XOP]
+// * VPROTQ imm8, m128, xmm [XOP]
+// * VPROTQ xmm, m128, xmm [XOP]
+//
+func (self *Program) VPROTQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPROTQ", 3, Operands { v0, v1, v2 })
+ // VPROTQ imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78)
+ m.emit(0xc3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROTQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x93)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x93)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPROTQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x93)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPROTQ imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0xc3)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROTQ xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x93)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPROTQ")
+ }
+ return p
+}
+
+// VPROTW performs "Packed Rotate Words".
+//
+// Mnemonic : VPROTW
+// Supported forms : (5 forms)
+//
+// * VPROTW imm8, xmm, xmm [XOP]
+// * VPROTW xmm, xmm, xmm [XOP]
+// * VPROTW m128, xmm, xmm [XOP]
+// * VPROTW imm8, m128, xmm [XOP]
+// * VPROTW xmm, m128, xmm [XOP]
+//
+func (self *Program) VPROTW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPROTW", 3, Operands { v0, v1, v2 })
+ // VPROTW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78)
+ m.emit(0xc1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROTW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x91)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x91)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPROTW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x91)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPROTW imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0xc1)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPROTW xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x91)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPROTW")
+ }
+ return p
+}
+
+// VPSADBW performs "Compute Sum of Absolute Differences".
+//
+// Mnemonic : VPSADBW
+// Supported forms : (10 forms)
+//
+// * VPSADBW xmm, xmm, xmm [AVX]
+// * VPSADBW m128, xmm, xmm [AVX]
+// * VPSADBW ymm, ymm, ymm [AVX2]
+// * VPSADBW m256, ymm, ymm [AVX2]
+// * VPSADBW zmm, zmm, zmm [AVX512BW]
+// * VPSADBW m512, zmm, zmm [AVX512BW]
+// * VPSADBW xmm, xmm, xmm [AVX512BW,AVX512VL]
+// * VPSADBW m128, xmm, xmm [AVX512BW,AVX512VL]
+// * VPSADBW ymm, ymm, ymm [AVX512BW,AVX512VL]
+// * VPSADBW m256, ymm, ymm [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSADBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSADBW", 3, Operands { v0, v1, v2 })
+ // VPSADBW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSADBW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSADBW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSADBW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSADBW zmm, zmm, zmm
+ if isZMM(v0) && isZMM(v1) && isZMM(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40)
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSADBW m512, zmm, zmm
+ if isM512(v0) && isZMM(v1) && isZMM(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0xf6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSADBW xmm, xmm, xmm
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00)
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSADBW m128, xmm, xmm
+ if isM128(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0xf6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSADBW ymm, ymm, ymm
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x20)
+ m.emit(0xf6)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSADBW m256, ymm, ymm
+ if isM256(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0)
+ m.emit(0xf6)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSADBW")
+ }
+ return p
+}
+
+// VPSCATTERDD performs "Scatter Packed Doubleword Values with Signed Doubleword Indices".
+//
+// Mnemonic : VPSCATTERDD
+// Supported forms : (3 forms)
+//
+// * VPSCATTERDD zmm, vm32z{k} [AVX512F]
+// * VPSCATTERDD xmm, vm32x{k} [AVX512F,AVX512VL]
+// * VPSCATTERDD ymm, vm32y{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSCATTERDD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPSCATTERDD", 2, Operands { v0, v1 })
+ // VPSCATTERDD zmm, vm32z{k}
+ if isZMM(v0) && isVMZk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa0)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPSCATTERDD xmm, vm32x{k}
+ if isEVEXXMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa0)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPSCATTERDD ymm, vm32y{k}
+ if isEVEXYMM(v0) && isVMYk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa0)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSCATTERDD")
+ }
+ return p
+}
+
+// VPSCATTERDQ performs "Scatter Packed Quadword Values with Signed Doubleword Indices".
+//
+// Mnemonic : VPSCATTERDQ
+// Supported forms : (3 forms)
+//
+// * VPSCATTERDQ zmm, vm32y{k} [AVX512F]
+// * VPSCATTERDQ xmm, vm32x{k} [AVX512F,AVX512VL]
+// * VPSCATTERDQ ymm, vm32x{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSCATTERDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPSCATTERDQ", 2, Operands { v0, v1 })
+ // VPSCATTERDQ zmm, vm32y{k}
+ if isZMM(v0) && isVMYk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa0)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPSCATTERDQ xmm, vm32x{k}
+ if isEVEXXMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa0)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPSCATTERDQ ymm, vm32x{k}
+ if isEVEXYMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa0)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSCATTERDQ")
+ }
+ return p
+}
+
+// VPSCATTERQD performs "Scatter Packed Doubleword Values with Signed Quadword Indices".
+//
+// Mnemonic : VPSCATTERQD
+// Supported forms : (3 forms)
+//
+// * VPSCATTERQD ymm, vm64z{k} [AVX512F]
+// * VPSCATTERQD xmm, vm64x{k} [AVX512F,AVX512VL]
+// * VPSCATTERQD xmm, vm64y{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSCATTERQD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPSCATTERQD", 2, Operands { v0, v1 })
+ // VPSCATTERQD ymm, vm64z{k}
+ if isEVEXYMM(v0) && isVMZk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPSCATTERQD xmm, vm64x{k}
+ if isEVEXXMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VPSCATTERQD xmm, vm64y{k}
+ if isEVEXXMM(v0) && isVMYk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSCATTERQD")
+ }
+ return p
+}
+
+// VPSCATTERQQ performs "Scatter Packed Quadword Values with Signed Quadword Indices".
+//
+// Mnemonic : VPSCATTERQQ
+// Supported forms : (3 forms)
+//
+// * VPSCATTERQQ zmm, vm64z{k} [AVX512F]
+// * VPSCATTERQQ xmm, vm64x{k} [AVX512F,AVX512VL]
+// * VPSCATTERQQ ymm, vm64y{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSCATTERQQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPSCATTERQQ", 2, Operands { v0, v1 })
+ // VPSCATTERQQ zmm, vm64z{k}
+ if isZMM(v0) && isVMZk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPSCATTERQQ xmm, vm64x{k}
+ if isEVEXXMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VPSCATTERQQ ymm, vm64y{k}
+ if isEVEXYMM(v0) && isVMYk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSCATTERQQ")
+ }
+ return p
+}
+
+// VPSHAB performs "Packed Shift Arithmetic Bytes".
+//
+// Mnemonic : VPSHAB
+// Supported forms : (3 forms)
+//
+// * VPSHAB xmm, xmm, xmm [XOP]
+// * VPSHAB m128, xmm, xmm [XOP]
+// * VPSHAB xmm, m128, xmm [XOP]
+//
+func (self *Program) VPSHAB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHAB", 3, Operands { v0, v1, v2 })
+ // VPSHAB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x98)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHAB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHAB xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x98)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHAB")
+ }
+ return p
+}
+
+// VPSHAD performs "Packed Shift Arithmetic Doublewords".
+//
+// Mnemonic : VPSHAD
+// Supported forms : (3 forms)
+//
+// * VPSHAD xmm, xmm, xmm [XOP]
+// * VPSHAD m128, xmm, xmm [XOP]
+// * VPSHAD xmm, m128, xmm [XOP]
+//
+func (self *Program) VPSHAD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHAD", 3, Operands { v0, v1, v2 })
+ // VPSHAD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHAD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHAD xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x9a)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHAD")
+ }
+ return p
+}
+
+// VPSHAQ performs "Packed Shift Arithmetic Quadwords".
+//
+// Mnemonic : VPSHAQ
+// Supported forms : (3 forms)
+//
+// * VPSHAQ xmm, xmm, xmm [XOP]
+// * VPSHAQ m128, xmm, xmm [XOP]
+// * VPSHAQ xmm, m128, xmm [XOP]
+//
+func (self *Program) VPSHAQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHAQ", 3, Operands { v0, v1, v2 })
+ // VPSHAQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x9b)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHAQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x9b)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHAQ xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x9b)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHAQ")
+ }
+ return p
+}
+
+// VPSHAW performs "Packed Shift Arithmetic Words".
+//
+// Mnemonic : VPSHAW
+// Supported forms : (3 forms)
+//
+// * VPSHAW xmm, xmm, xmm [XOP]
+// * VPSHAW m128, xmm, xmm [XOP]
+// * VPSHAW xmm, m128, xmm [XOP]
+//
+func (self *Program) VPSHAW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHAW", 3, Operands { v0, v1, v2 })
+ // VPSHAW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x99)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHAW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x99)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHAW xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x99)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHAW")
+ }
+ return p
+}
+
+// VPSHLB performs "Packed Shift Logical Bytes".
+//
+// Mnemonic : VPSHLB
+// Supported forms : (3 forms)
+//
+// * VPSHLB xmm, xmm, xmm [XOP]
+// * VPSHLB m128, xmm, xmm [XOP]
+// * VPSHLB xmm, m128, xmm [XOP]
+//
+func (self *Program) VPSHLB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHLB", 3, Operands { v0, v1, v2 })
+ // VPSHLB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x94)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x94)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHLB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x94)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHLB xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x94)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHLB")
+ }
+ return p
+}
+
+// VPSHLD performs "Packed Shift Logical Doublewords".
+//
+// Mnemonic : VPSHLD
+// Supported forms : (3 forms)
+//
+// * VPSHLD xmm, xmm, xmm [XOP]
+// * VPSHLD m128, xmm, xmm [XOP]
+// * VPSHLD xmm, m128, xmm [XOP]
+//
+func (self *Program) VPSHLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHLD", 3, Operands { v0, v1, v2 })
+ // VPSHLD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x96)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHLD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHLD xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x96)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHLD")
+ }
+ return p
+}
+
+// VPSHLQ performs "Packed Shift Logical Quadwords".
+//
+// Mnemonic : VPSHLQ
+// Supported forms : (3 forms)
+//
+// * VPSHLQ xmm, xmm, xmm [XOP]
+// * VPSHLQ m128, xmm, xmm [XOP]
+// * VPSHLQ xmm, m128, xmm [XOP]
+//
+func (self *Program) VPSHLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHLQ", 3, Operands { v0, v1, v2 })
+ // VPSHLQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x97)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHLQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHLQ xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x97)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHLQ")
+ }
+ return p
+}
+
+// VPSHLW performs "Packed Shift Logical Words".
+//
+// Mnemonic : VPSHLW
+// Supported forms : (3 forms)
+//
+// * VPSHLW xmm, xmm, xmm [XOP]
+// * VPSHLW m128, xmm, xmm [XOP]
+// * VPSHLW xmm, m128, xmm [XOP]
+//
+func (self *Program) VPSHLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHLW", 3, Operands { v0, v1, v2 })
+ // VPSHLW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x78 ^ (hlcode(v[0]) << 3))
+ m.emit(0x95)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x8f)
+ m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf8 ^ (hlcode(v[1]) << 3))
+ m.emit(0x95)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHLW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x95)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHLW xmm, m128, xmm
+ if isXMM(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_XOP)
+ p.domain = DomainAMDSpecific
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0]))
+ m.emit(0x95)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHLW")
+ }
+ return p
+}
+
+// VPSHUFB performs "Packed Shuffle Bytes".
+//
+// Mnemonic : VPSHUFB
+// Supported forms : (10 forms)
+//
+// * VPSHUFB xmm, xmm, xmm [AVX]
+// * VPSHUFB m128, xmm, xmm [AVX]
+// * VPSHUFB ymm, ymm, ymm [AVX2]
+// * VPSHUFB m256, ymm, ymm [AVX2]
+// * VPSHUFB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSHUFB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSHUFB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSHUFB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSHUFB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSHUFB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSHUFB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHUFB", 3, Operands { v0, v1, v2 })
+ // VPSHUFB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHUFB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x00)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHUFB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHUFB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x00)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSHUFB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHUFB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x00)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSHUFB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHUFB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x00)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSHUFB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x00)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSHUFB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x00)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHUFB")
+ }
+ return p
+}
+
+// VPSHUFD performs "Shuffle Packed Doublewords".
+//
+// Mnemonic : VPSHUFD
+// Supported forms : (10 forms)
+//
+// * VPSHUFD imm8, xmm, xmm [AVX]
+// * VPSHUFD imm8, m128, xmm [AVX]
+// * VPSHUFD imm8, ymm, ymm [AVX2]
+// * VPSHUFD imm8, m256, ymm [AVX2]
+// * VPSHUFD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VPSHUFD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPSHUFD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSHUFD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSHUFD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSHUFD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSHUFD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHUFD", 3, Operands { v0, v1, v2 })
+ // VPSHUFD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[1], 0)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFD imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[1], 0)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFD imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFD imm8, m512/m32bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFD imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFD imm8, m128/m32bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFD imm8, m256/m32bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFD imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFD imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHUFD")
+ }
+ return p
+}
+
+// VPSHUFHW performs "Shuffle Packed High Words".
+//
+// Mnemonic : VPSHUFHW
+// Supported forms : (10 forms)
+//
+// * VPSHUFHW imm8, xmm, xmm [AVX]
+// * VPSHUFHW imm8, m128, xmm [AVX]
+// * VPSHUFHW imm8, ymm, ymm [AVX2]
+// * VPSHUFHW imm8, m256, ymm [AVX2]
+// * VPSHUFHW imm8, zmm, zmm{k}{z} [AVX512BW]
+// * VPSHUFHW imm8, m512, zmm{k}{z} [AVX512BW]
+// * VPSHUFHW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSHUFHW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSHUFHW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSHUFHW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSHUFHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHUFHW", 3, Operands { v0, v1, v2 })
+ // VPSHUFHW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[1], 0)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFHW imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFHW imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[2]), v[1], 0)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFHW imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(6, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFHW imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFHW imm8, m512, zmm{k}{z}
+ if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFHW imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFHW imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFHW imm8, m128, xmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFHW imm8, m256, ymm{k}{z}
+ if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHUFHW")
+ }
+ return p
+}
+
+// VPSHUFLW performs "Shuffle Packed Low Words".
+//
+// Mnemonic : VPSHUFLW
+// Supported forms : (10 forms)
+//
+// * VPSHUFLW imm8, xmm, xmm [AVX]
+// * VPSHUFLW imm8, m128, xmm [AVX]
+// * VPSHUFLW imm8, ymm, ymm [AVX2]
+// * VPSHUFLW imm8, m256, ymm [AVX2]
+// * VPSHUFLW imm8, zmm, zmm{k}{z} [AVX512BW]
+// * VPSHUFLW imm8, m512, zmm{k}{z} [AVX512BW]
+// * VPSHUFLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSHUFLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSHUFLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSHUFLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSHUFLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSHUFLW", 3, Operands { v0, v1, v2 })
+ // VPSHUFLW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[1], 0)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFLW imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFLW imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[2]), v[1], 0)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFLW imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(7, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFLW imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFLW imm8, m512, zmm{k}{z}
+ if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFLW imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFLW imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7f)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x70)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFLW imm8, m128, xmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSHUFLW imm8, m256, ymm{k}{z}
+ if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x07, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x70)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSHUFLW")
+ }
+ return p
+}
+
+// VPSIGNB performs "Packed Sign of Byte Integers".
+//
+// Mnemonic : VPSIGNB
+// Supported forms : (4 forms)
+//
+// * VPSIGNB xmm, xmm, xmm [AVX]
+// * VPSIGNB m128, xmm, xmm [AVX]
+// * VPSIGNB ymm, ymm, ymm [AVX2]
+// * VPSIGNB m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPSIGNB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSIGNB", 3, Operands { v0, v1, v2 })
+ // VPSIGNB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSIGNB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x08)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSIGNB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSIGNB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x08)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSIGNB")
+ }
+ return p
+}
+
+// VPSIGND performs "Packed Sign of Doubleword Integers".
+//
+// Mnemonic : VPSIGND
+// Supported forms : (4 forms)
+//
+// * VPSIGND xmm, xmm, xmm [AVX]
+// * VPSIGND m128, xmm, xmm [AVX]
+// * VPSIGND ymm, ymm, ymm [AVX2]
+// * VPSIGND m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPSIGND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSIGND", 3, Operands { v0, v1, v2 })
+ // VPSIGND xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x0a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSIGND m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x0a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSIGND ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x0a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSIGND m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x0a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSIGND")
+ }
+ return p
+}
+
+// VPSIGNW performs "Packed Sign of Word Integers".
+//
+// Mnemonic : VPSIGNW
+// Supported forms : (4 forms)
+//
+// * VPSIGNW xmm, xmm, xmm [AVX]
+// * VPSIGNW m128, xmm, xmm [AVX]
+// * VPSIGNW ymm, ymm, ymm [AVX2]
+// * VPSIGNW m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPSIGNW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSIGNW", 3, Operands { v0, v1, v2 })
+ // VPSIGNW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSIGNW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x09)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSIGNW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSIGNW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x09)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSIGNW")
+ }
+ return p
+}
+
+// VPSLLD performs "Shift Packed Doubleword Data Left Logical".
+//
+// Mnemonic : VPSLLD
+// Supported forms : (18 forms)
+//
+// * VPSLLD imm8, xmm, xmm [AVX]
+// * VPSLLD xmm, xmm, xmm [AVX]
+// * VPSLLD m128, xmm, xmm [AVX]
+// * VPSLLD imm8, ymm, ymm [AVX2]
+// * VPSLLD xmm, ymm, ymm [AVX2]
+// * VPSLLD m128, ymm, ymm [AVX2]
+// * VPSLLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VPSLLD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLD xmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLD m128, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSLLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSLLD", 3, Operands { v0, v1, v2 })
+ // VPSLLD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x72)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLD imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x72)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLD xmm, ymm, ymm
+ if isXMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLD m128, ymm, ymm
+ if isM128(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLD imm8, m512/m32bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(6, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLD imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x72)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLD xmm, zmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xf2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLD m128, zmm, zmm{k}{z}
+ if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLD imm8, m128/m32bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(6, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLD imm8, m256/m32bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(6, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLD imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x72)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xf2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLD m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLD imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x72)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLD xmm, ymm, ymm{k}{z}
+ if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xf2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLD m128, ymm, ymm{k}{z}
+ if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSLLD")
+ }
+ return p
+}
+
+// VPSLLDQ performs "Shift Packed Double Quadword Left Logical".
+//
+// Mnemonic : VPSLLDQ
+// Supported forms : (8 forms)
+//
+// * VPSLLDQ imm8, xmm, xmm [AVX]
+// * VPSLLDQ imm8, ymm, ymm [AVX2]
+// * VPSLLDQ imm8, zmm, zmm [AVX512BW]
+// * VPSLLDQ imm8, m512, zmm [AVX512BW]
+// * VPSLLDQ imm8, xmm, xmm [AVX512BW,AVX512VL]
+// * VPSLLDQ imm8, m128, xmm [AVX512BW,AVX512VL]
+// * VPSLLDQ imm8, ymm, ymm [AVX512BW,AVX512VL]
+// * VPSLLDQ imm8, m256, ymm [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSLLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSLLDQ", 3, Operands { v0, v1, v2 })
+ // VPSLLDQ imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x73)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLDQ imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x73)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLDQ imm8, zmm, zmm
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x40)
+ m.emit(0x73)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLDQ imm8, m512, zmm
+ if isImm8(v0) && isM512(v1) && isZMM(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x73)
+ m.mrsd(7, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLDQ imm8, xmm, xmm
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
+ m.emit(0x73)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLDQ imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x73)
+ m.mrsd(7, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLDQ imm8, ymm, ymm
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x20)
+ m.emit(0x73)
+ m.emit(0xf8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLDQ imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x73)
+ m.mrsd(7, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSLLDQ")
+ }
+ return p
+}
+
+// VPSLLQ performs "Shift Packed Quadword Data Left Logical".
+//
+// Mnemonic : VPSLLQ
+// Supported forms : (18 forms)
+//
+// * VPSLLQ imm8, xmm, xmm [AVX]
+// * VPSLLQ xmm, xmm, xmm [AVX]
+// * VPSLLQ m128, xmm, xmm [AVX]
+// * VPSLLQ imm8, ymm, ymm [AVX2]
+// * VPSLLQ xmm, ymm, ymm [AVX2]
+// * VPSLLQ m128, ymm, ymm [AVX2]
+// * VPSLLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VPSLLQ imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLQ xmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLQ m128, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSLLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSLLQ", 3, Operands { v0, v1, v2 })
+ // VPSLLQ imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x73)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLQ imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x73)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLQ xmm, ymm, ymm
+ if isXMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLQ m128, ymm, ymm
+ if isM128(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLQ imm8, m512/m64bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x73)
+ m.mrsd(6, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLQ imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x73)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLQ xmm, zmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xf3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLQ m128, zmm, zmm{k}{z}
+ if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLQ imm8, m128/m64bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x73)
+ m.mrsd(6, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLQ imm8, m256/m64bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x73)
+ m.mrsd(6, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLQ imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x73)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xf3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLQ m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLQ imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x73)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLQ xmm, ymm, ymm{k}{z}
+ if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xf3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLQ m128, ymm, ymm{k}{z}
+ if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSLLQ")
+ }
+ return p
+}
+
+// VPSLLVD performs "Variable Shift Packed Doubleword Data Left Logical".
+//
+// Mnemonic : VPSLLVD
+// Supported forms : (10 forms)
+//
+// * VPSLLVD xmm, xmm, xmm [AVX2]
+// * VPSLLVD m128, xmm, xmm [AVX2]
+// * VPSLLVD ymm, ymm, ymm [AVX2]
+// * VPSLLVD m256, ymm, ymm [AVX2]
+// * VPSLLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLVD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSLLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSLLVD", 3, Operands { v0, v1, v2 })
+ // VPSLLVD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLVD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLVD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSLLVD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLVD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPSLLVD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSLLVD")
+ }
+ return p
+}
+
+// VPSLLVQ performs "Variable Shift Packed Quadword Data Left Logical".
+//
+// Mnemonic : VPSLLVQ
+// Supported forms : (10 forms)
+//
+// * VPSLLVQ xmm, xmm, xmm [AVX2]
+// * VPSLLVQ m128, xmm, xmm [AVX2]
+// * VPSLLVQ ymm, ymm, ymm [AVX2]
+// * VPSLLVQ m256, ymm, ymm [AVX2]
+// * VPSLLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLVQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSLLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSLLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSLLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSLLVQ", 3, Operands { v0, v1, v2 })
+ // VPSLLVQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLVQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLVQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSLLVQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLVQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x47)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPSLLVQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x47)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSLLVQ")
+ }
+ return p
+}
+
+// VPSLLVW performs "Variable Shift Packed Word Data Left Logical".
+//
+// Mnemonic : VPSLLVW
+// Supported forms : (6 forms)
+//
+// * VPSLLVW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSLLVW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSLLVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSLLVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSLLVW", 3, Operands { v0, v1, v2 })
+ // VPSLLVW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSLLVW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLVW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x12)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLVW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x12)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSLLVW")
+ }
+ return p
+}
+
+// VPSLLW performs "Shift Packed Word Data Left Logical".
+//
+// Mnemonic : VPSLLW
+// Supported forms : (18 forms)
+//
+// * VPSLLW imm8, xmm, xmm [AVX]
+// * VPSLLW xmm, xmm, xmm [AVX]
+// * VPSLLW m128, xmm, xmm [AVX]
+// * VPSLLW imm8, ymm, ymm [AVX2]
+// * VPSLLW xmm, ymm, ymm [AVX2]
+// * VPSLLW m128, ymm, ymm [AVX2]
+// * VPSLLW imm8, zmm, zmm{k}{z} [AVX512BW]
+// * VPSLLW xmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSLLW m128, zmm, zmm{k}{z} [AVX512BW]
+// * VPSLLW imm8, m512, zmm{k}{z} [AVX512BW]
+// * VPSLLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSLLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSLLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSLLW", 3, Operands { v0, v1, v2 })
+ // VPSLLW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x71)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLW imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x71)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLW xmm, ymm, ymm
+ if isXMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLW m128, ymm, ymm
+ if isM128(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSLLW imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x71)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLW xmm, zmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLW m128, zmm, zmm{k}{z}
+ if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLW imm8, m512, zmm{k}{z}
+ if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x71)
+ m.mrsd(6, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLW imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x71)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLW imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x71)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLW xmm, ymm, ymm{k}{z}
+ if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xf1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSLLW m128, ymm, ymm{k}{z}
+ if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSLLW imm8, m128, xmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x71)
+ m.mrsd(6, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSLLW imm8, m256, ymm{k}{z}
+ if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x71)
+ m.mrsd(6, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSLLW")
+ }
+ return p
+}
+
+// VPSRAD performs "Shift Packed Doubleword Data Right Arithmetic".
+//
+// Mnemonic : VPSRAD
+// Supported forms : (18 forms)
+//
+// * VPSRAD imm8, xmm, xmm [AVX]
+// * VPSRAD xmm, xmm, xmm [AVX]
+// * VPSRAD m128, xmm, xmm [AVX]
+// * VPSRAD imm8, ymm, ymm [AVX2]
+// * VPSRAD xmm, ymm, ymm [AVX2]
+// * VPSRAD m128, ymm, ymm [AVX2]
+// * VPSRAD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VPSRAD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAD xmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAD m128, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSRAD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRAD", 3, Operands { v0, v1, v2 })
+ // VPSRAD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRAD imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAD xmm, ymm, ymm
+ if isXMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAD m128, ymm, ymm
+ if isM128(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRAD imm8, m512/m32bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(4, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAD imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAD xmm, zmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAD m128, zmm, zmm{k}{z}
+ if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAD imm8, m128/m32bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(4, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAD imm8, m256/m32bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(4, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAD imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAD m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAD imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAD xmm, ymm, ymm{k}{z}
+ if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAD m128, ymm, ymm{k}{z}
+ if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRAD")
+ }
+ return p
+}
+
+// VPSRAQ performs "Shift Packed Quadword Data Right Arithmetic".
+//
+// Mnemonic : VPSRAQ
+// Supported forms : (12 forms)
+//
+// * VPSRAQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VPSRAQ imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAQ xmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAQ m128, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSRAQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRAQ", 3, Operands { v0, v1, v2 })
+ // VPSRAQ imm8, m512/m64bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(4, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAQ imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAQ xmm, zmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAQ m128, zmm, zmm{k}{z}
+ if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAQ imm8, m128/m64bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(4, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAQ imm8, m256/m64bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(4, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAQ imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAQ m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAQ imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x72)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAQ xmm, ymm, ymm{k}{z}
+ if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xe2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAQ m128, ymm, ymm{k}{z}
+ if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRAQ")
+ }
+ return p
+}
+
+// VPSRAVD performs "Variable Shift Packed Doubleword Data Right Arithmetic".
+//
+// Mnemonic : VPSRAVD
+// Supported forms : (10 forms)
+//
+// * VPSRAVD xmm, xmm, xmm [AVX2]
+// * VPSRAVD m128, xmm, xmm [AVX2]
+// * VPSRAVD ymm, ymm, ymm [AVX2]
+// * VPSRAVD m256, ymm, ymm [AVX2]
+// * VPSRAVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAVD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSRAVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRAVD", 3, Operands { v0, v1, v2 })
+ // VPSRAVD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAVD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x46)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRAVD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAVD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x46)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRAVD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x46)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSRAVD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAVD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x46)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAVD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAVD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x46)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPSRAVD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRAVD")
+ }
+ return p
+}
+
+// VPSRAVQ performs "Variable Shift Packed Quadword Data Right Arithmetic".
+//
+// Mnemonic : VPSRAVQ
+// Supported forms : (6 forms)
+//
+// * VPSRAVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAVQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSRAVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRAVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSRAVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRAVQ", 3, Operands { v0, v1, v2 })
+ // VPSRAVQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x46)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSRAVQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAVQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x46)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAVQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAVQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x46)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPSRAVQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x46)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRAVQ")
+ }
+ return p
+}
+
+// VPSRAVW performs "Variable Shift Packed Word Data Right Arithmetic".
+//
+// Mnemonic : VPSRAVW
+// Supported forms : (6 forms)
+//
+// * VPSRAVW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRAVW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRAVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSRAVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRAVW", 3, Operands { v0, v1, v2 })
+ // VPSRAVW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAVW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSRAVW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAVW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAVW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x11)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAVW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x11)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRAVW")
+ }
+ return p
+}
+
+// VPSRAW performs "Shift Packed Word Data Right Arithmetic".
+//
+// Mnemonic : VPSRAW
+// Supported forms : (18 forms)
+//
+// * VPSRAW imm8, xmm, xmm [AVX]
+// * VPSRAW xmm, xmm, xmm [AVX]
+// * VPSRAW m128, xmm, xmm [AVX]
+// * VPSRAW imm8, ymm, ymm [AVX2]
+// * VPSRAW xmm, ymm, ymm [AVX2]
+// * VPSRAW m128, ymm, ymm [AVX2]
+// * VPSRAW imm8, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRAW xmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRAW m128, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRAW imm8, m512, zmm{k}{z} [AVX512BW]
+// * VPSRAW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRAW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSRAW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRAW", 3, Operands { v0, v1, v2 })
+ // VPSRAW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x71)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRAW imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x71)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAW xmm, ymm, ymm
+ if isXMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAW m128, ymm, ymm
+ if isM128(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRAW imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x71)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAW xmm, zmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xe1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAW m128, zmm, zmm{k}{z}
+ if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAW imm8, m512, zmm{k}{z}
+ if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x71)
+ m.mrsd(4, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAW imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x71)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xe1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAW imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x71)
+ m.emit(0xe0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAW xmm, ymm, ymm{k}{z}
+ if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xe1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRAW m128, ymm, ymm{k}{z}
+ if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRAW imm8, m128, xmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x71)
+ m.mrsd(4, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRAW imm8, m256, ymm{k}{z}
+ if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x71)
+ m.mrsd(4, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRAW")
+ }
+ return p
+}
+
+// VPSRLD performs "Shift Packed Doubleword Data Right Logical".
+//
+// Mnemonic : VPSRLD
+// Supported forms : (18 forms)
+//
+// * VPSRLD imm8, xmm, xmm [AVX]
+// * VPSRLD xmm, xmm, xmm [AVX]
+// * VPSRLD m128, xmm, xmm [AVX]
+// * VPSRLD imm8, ymm, ymm [AVX2]
+// * VPSRLD xmm, ymm, ymm [AVX2]
+// * VPSRLD m128, ymm, ymm [AVX2]
+// * VPSRLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VPSRLD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLD xmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLD m128, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSRLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRLD", 3, Operands { v0, v1, v2 })
+ // VPSRLD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x72)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLD imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x72)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLD xmm, ymm, ymm
+ if isXMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLD m128, ymm, ymm
+ if isM128(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLD imm8, m512/m32bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(2, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLD imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x72)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLD xmm, zmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xd2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLD m128, zmm, zmm{k}{z}
+ if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLD imm8, m128/m32bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(2, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLD imm8, m256/m32bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x72)
+ m.mrsd(2, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLD imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x72)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xd2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLD m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLD imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x72)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLD xmm, ymm, ymm{k}{z}
+ if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xd2)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLD m128, ymm, ymm{k}{z}
+ if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd2)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRLD")
+ }
+ return p
+}
+
+// VPSRLDQ performs "Shift Packed Double Quadword Right Logical".
+//
+// Mnemonic : VPSRLDQ
+// Supported forms : (8 forms)
+//
+// * VPSRLDQ imm8, xmm, xmm [AVX]
+// * VPSRLDQ imm8, ymm, ymm [AVX2]
+// * VPSRLDQ imm8, zmm, zmm [AVX512BW]
+// * VPSRLDQ imm8, m512, zmm [AVX512BW]
+// * VPSRLDQ imm8, xmm, xmm [AVX512BW,AVX512VL]
+// * VPSRLDQ imm8, m128, xmm [AVX512BW,AVX512VL]
+// * VPSRLDQ imm8, ymm, ymm [AVX512BW,AVX512VL]
+// * VPSRLDQ imm8, m256, ymm [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSRLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRLDQ", 3, Operands { v0, v1, v2 })
+ // VPSRLDQ imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x73)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLDQ imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x73)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLDQ imm8, zmm, zmm
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x40)
+ m.emit(0x73)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLDQ imm8, m512, zmm
+ if isImm8(v0) && isM512(v1) && isZMM(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x73)
+ m.mrsd(3, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLDQ imm8, xmm, xmm
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00)
+ m.emit(0x73)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLDQ imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x73)
+ m.mrsd(3, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLDQ imm8, ymm, ymm
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x20)
+ m.emit(0x73)
+ m.emit(0xd8 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLDQ imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), 0, 0, 0)
+ m.emit(0x73)
+ m.mrsd(3, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRLDQ")
+ }
+ return p
+}
+
+// VPSRLQ performs "Shift Packed Quadword Data Right Logical".
+//
+// Mnemonic : VPSRLQ
+// Supported forms : (18 forms)
+//
+// * VPSRLQ imm8, xmm, xmm [AVX]
+// * VPSRLQ xmm, xmm, xmm [AVX]
+// * VPSRLQ m128, xmm, xmm [AVX]
+// * VPSRLQ imm8, ymm, ymm [AVX2]
+// * VPSRLQ xmm, ymm, ymm [AVX2]
+// * VPSRLQ m128, ymm, ymm [AVX2]
+// * VPSRLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VPSRLQ imm8, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLQ xmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLQ m128, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSRLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRLQ", 3, Operands { v0, v1, v2 })
+ // VPSRLQ imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x73)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLQ imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x73)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLQ xmm, ymm, ymm
+ if isXMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLQ m128, ymm, ymm
+ if isM128(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLQ imm8, m512/m64bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x73)
+ m.mrsd(2, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLQ imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x73)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLQ xmm, zmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLQ m128, zmm, zmm{k}{z}
+ if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLQ imm8, m128/m64bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x73)
+ m.mrsd(2, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLQ imm8, m256/m64bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x73)
+ m.mrsd(2, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLQ imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x73)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLQ m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLQ imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x73)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLQ xmm, ymm, ymm{k}{z}
+ if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xd3)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLQ m128, ymm, ymm{k}{z}
+ if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd3)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRLQ")
+ }
+ return p
+}
+
+// VPSRLVD performs "Variable Shift Packed Doubleword Data Right Logical".
+//
+// Mnemonic : VPSRLVD
+// Supported forms : (10 forms)
+//
+// * VPSRLVD xmm, xmm, xmm [AVX2]
+// * VPSRLVD m128, xmm, xmm [AVX2]
+// * VPSRLVD ymm, ymm, ymm [AVX2]
+// * VPSRLVD m256, ymm, ymm [AVX2]
+// * VPSRLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLVD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSRLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRLVD", 3, Operands { v0, v1, v2 })
+ // VPSRLVD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79 ^ (hlcode(v[1]) << 3))
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLVD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLVD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSRLVD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLVD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPSRLVD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRLVD")
+ }
+ return p
+}
+
+// VPSRLVQ performs "Variable Shift Packed Quadword Data Right Logical".
+//
+// Mnemonic : VPSRLVQ
+// Supported forms : (10 forms)
+//
+// * VPSRLVQ xmm, xmm, xmm [AVX2]
+// * VPSRLVQ m128, xmm, xmm [AVX2]
+// * VPSRLVQ ymm, ymm, ymm [AVX2]
+// * VPSRLVQ m256, ymm, ymm [AVX2]
+// * VPSRLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLVQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSRLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSRLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSRLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRLVQ", 3, Operands { v0, v1, v2 })
+ // VPSRLVQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xf9 ^ (hlcode(v[1]) << 3))
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLVQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLVQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSRLVQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLVQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x45)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPSRLVQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x45)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRLVQ")
+ }
+ return p
+}
+
+// VPSRLVW performs "Variable Shift Packed Word Data Right Logical".
+//
+// Mnemonic : VPSRLVW
+// Supported forms : (6 forms)
+//
+// * VPSRLVW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRLVW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRLVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSRLVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRLVW", 3, Operands { v0, v1, v2 })
+ // VPSRLVW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSRLVW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLVW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x10)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLVW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x10)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRLVW")
+ }
+ return p
+}
+
+// VPSRLW performs "Shift Packed Word Data Right Logical".
+//
+// Mnemonic : VPSRLW
+// Supported forms : (18 forms)
+//
+// * VPSRLW imm8, xmm, xmm [AVX]
+// * VPSRLW xmm, xmm, xmm [AVX]
+// * VPSRLW m128, xmm, xmm [AVX]
+// * VPSRLW imm8, ymm, ymm [AVX2]
+// * VPSRLW xmm, ymm, ymm [AVX2]
+// * VPSRLW m128, ymm, ymm [AVX2]
+// * VPSRLW imm8, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRLW xmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRLW m128, zmm, zmm{k}{z} [AVX512BW]
+// * VPSRLW imm8, m512, zmm{k}{z} [AVX512BW]
+// * VPSRLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSRLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSRLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSRLW", 3, Operands { v0, v1, v2 })
+ // VPSRLW imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, 0, v[1], hlcode(v[2]))
+ m.emit(0x71)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLW imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, 0, v[1], hlcode(v[2]))
+ m.emit(0x71)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLW xmm, ymm, ymm
+ if isXMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLW m128, ymm, ymm
+ if isM128(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSRLW imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x71)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLW xmm, zmm, zmm{k}{z}
+ if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLW m128, zmm, zmm{k}{z}
+ if isM128(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLW imm8, m512, zmm{k}{z}
+ if isImm8(v0) && isM512(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x71)
+ m.mrsd(2, addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLW imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x71)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLW imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ (ehcode(v[1]) << 5))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x71)
+ m.emit(0xd0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLW xmm, ymm, ymm{k}{z}
+ if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xd1)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSRLW m128, ymm, ymm{k}{z}
+ if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd1)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSRLW imm8, m128, xmm{k}{z}
+ if isImm8(v0) && isM128(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x71)
+ m.mrsd(2, addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPSRLW imm8, m256, ymm{k}{z}
+ if isImm8(v0) && isM256(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x71)
+ m.mrsd(2, addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSRLW")
+ }
+ return p
+}
+
+// VPSUBB performs "Subtract Packed Byte Integers".
+//
+// Mnemonic : VPSUBB
+// Supported forms : (10 forms)
+//
+// * VPSUBB xmm, xmm, xmm [AVX]
+// * VPSUBB m128, xmm, xmm [AVX]
+// * VPSUBB ymm, ymm, ymm [AVX2]
+// * VPSUBB m256, ymm, ymm [AVX2]
+// * VPSUBB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSUBB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSUBB", 3, Operands { v0, v1, v2 })
+ // VPSUBB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xf8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSUBB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xf8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSUBB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xf8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSUBB")
+ }
+ return p
+}
+
+// VPSUBD performs "Subtract Packed Doubleword Integers".
+//
+// Mnemonic : VPSUBD
+// Supported forms : (10 forms)
+//
+// * VPSUBD xmm, xmm, xmm [AVX]
+// * VPSUBD m128, xmm, xmm [AVX]
+// * VPSUBD ymm, ymm, ymm [AVX2]
+// * VPSUBD m256, ymm, ymm [AVX2]
+// * VPSUBD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPSUBD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSUBD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSUBD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSUBD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSUBD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSUBD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSUBD", 3, Operands { v0, v1, v2 })
+ // VPSUBD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xfa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSUBD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xfa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xfa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSUBD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xfa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xfa)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPSUBD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xfa)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSUBD")
+ }
+ return p
+}
+
+// VPSUBQ performs "Subtract Packed Quadword Integers".
+//
+// Mnemonic : VPSUBQ
+// Supported forms : (10 forms)
+//
+// * VPSUBQ xmm, xmm, xmm [AVX]
+// * VPSUBQ m128, xmm, xmm [AVX]
+// * VPSUBQ ymm, ymm, ymm [AVX2]
+// * VPSUBQ m256, ymm, ymm [AVX2]
+// * VPSUBQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPSUBQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPSUBQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSUBQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPSUBQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPSUBQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPSUBQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSUBQ", 3, Operands { v0, v1, v2 })
+ // VPSUBQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xfb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xfb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xfb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSUBQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xfb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xfb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSUBQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xfb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xfb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPSUBQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xfb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSUBQ")
+ }
+ return p
+}
+
+// VPSUBSB performs "Subtract Packed Signed Byte Integers with Signed Saturation".
+//
+// Mnemonic : VPSUBSB
+// Supported forms : (10 forms)
+//
+// * VPSUBSB xmm, xmm, xmm [AVX]
+// * VPSUBSB m128, xmm, xmm [AVX]
+// * VPSUBSB ymm, ymm, ymm [AVX2]
+// * VPSUBSB m256, ymm, ymm [AVX2]
+// * VPSUBSB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBSB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSUBSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSUBSB", 3, Operands { v0, v1, v2 })
+ // VPSUBSB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBSB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBSB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xe8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSUBSB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xe8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSUBSB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xe8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSUBSB")
+ }
+ return p
+}
+
+// VPSUBSW performs "Subtract Packed Signed Word Integers with Signed Saturation".
+//
+// Mnemonic : VPSUBSW
+// Supported forms : (10 forms)
+//
+// * VPSUBSW xmm, xmm, xmm [AVX]
+// * VPSUBSW m128, xmm, xmm [AVX]
+// * VPSUBSW ymm, ymm, ymm [AVX2]
+// * VPSUBSW m256, ymm, ymm [AVX2]
+// * VPSUBSW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBSW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSUBSW", 3, Operands { v0, v1, v2 })
+ // VPSUBSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xe9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xe9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBSW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xe9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSUBSW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xe9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSUBSW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xe9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBSW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xe9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSUBSW")
+ }
+ return p
+}
+
+// VPSUBUSB performs "Subtract Packed Unsigned Byte Integers with Unsigned Saturation".
+//
+// Mnemonic : VPSUBUSB
+// Supported forms : (10 forms)
+//
+// * VPSUBUSB xmm, xmm, xmm [AVX]
+// * VPSUBUSB m128, xmm, xmm [AVX]
+// * VPSUBUSB ymm, ymm, ymm [AVX2]
+// * VPSUBUSB m256, ymm, ymm [AVX2]
+// * VPSUBUSB zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBUSB m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBUSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBUSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBUSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBUSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSUBUSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSUBUSB", 3, Operands { v0, v1, v2 })
+ // VPSUBUSB xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSB m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBUSB ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSB m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBUSB zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xd8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSB m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSUBUSB xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xd8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSB m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSUBUSB ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xd8)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSB m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd8)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSUBUSB")
+ }
+ return p
+}
+
+// VPSUBUSW performs "Subtract Packed Unsigned Word Integers with Unsigned Saturation".
+//
+// Mnemonic : VPSUBUSW
+// Supported forms : (10 forms)
+//
+// * VPSUBUSW xmm, xmm, xmm [AVX]
+// * VPSUBUSW m128, xmm, xmm [AVX]
+// * VPSUBUSW ymm, ymm, ymm [AVX2]
+// * VPSUBUSW m256, ymm, ymm [AVX2]
+// * VPSUBUSW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBUSW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBUSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBUSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBUSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBUSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSUBUSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSUBUSW", 3, Operands { v0, v1, v2 })
+ // VPSUBUSW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBUSW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xd9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xd9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBUSW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xd9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSUBUSW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xd9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSUBUSW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xd9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBUSW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xd9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSUBUSW")
+ }
+ return p
+}
+
+// VPSUBW performs "Subtract Packed Word Integers".
+//
+// Mnemonic : VPSUBW
+// Supported forms : (10 forms)
+//
+// * VPSUBW xmm, xmm, xmm [AVX]
+// * VPSUBW m128, xmm, xmm [AVX]
+// * VPSUBW ymm, ymm, ymm [AVX2]
+// * VPSUBW m256, ymm, ymm [AVX2]
+// * VPSUBW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPSUBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPSUBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPSUBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPSUBW", 3, Operands { v0, v1, v2 })
+ // VPSUBW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xf9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xf9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPSUBW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xf9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPSUBW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xf9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPSUBW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xf9)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPSUBW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xf9)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPSUBW")
+ }
+ return p
+}
+
+// VPTERNLOGD performs "Bitwise Ternary Logical Operation on Doubleword Values".
+//
+// Mnemonic : VPTERNLOGD
+// Supported forms : (6 forms)
+//
+// * VPTERNLOGD imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPTERNLOGD imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPTERNLOGD imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPTERNLOGD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPTERNLOGD imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPTERNLOGD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPTERNLOGD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPTERNLOGD", 4, Operands { v0, v1, v2, v3 })
+ // VPTERNLOGD imm8, m512/m32bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x25)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGD imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGD imm8, m128/m32bcst, xmm, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x25)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGD imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGD imm8, m256/m32bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x25)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGD imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTERNLOGD")
+ }
+ return p
+}
+
+// VPTERNLOGQ performs "Bitwise Ternary Logical Operation on Quadword Values".
+//
+// Mnemonic : VPTERNLOGQ
+// Supported forms : (6 forms)
+//
+// * VPTERNLOGQ imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPTERNLOGQ imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPTERNLOGQ imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPTERNLOGQ imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPTERNLOGQ imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPTERNLOGQ imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPTERNLOGQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VPTERNLOGQ", 4, Operands { v0, v1, v2, v3 })
+ // VPTERNLOGQ imm8, m512/m64bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x25)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGQ imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGQ imm8, m128/m64bcst, xmm, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x25)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGQ imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGQ imm8, m256/m64bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x25)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VPTERNLOGQ imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x25)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTERNLOGQ")
+ }
+ return p
+}
+
+// VPTEST performs "Packed Logical Compare".
+//
+// Mnemonic : VPTEST
+// Supported forms : (4 forms)
+//
+// * VPTEST xmm, xmm [AVX]
+// * VPTEST m128, xmm [AVX]
+// * VPTEST ymm, ymm [AVX]
+// * VPTEST m256, ymm [AVX]
+//
+func (self *Program) VPTEST(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VPTEST", 2, Operands { v0, v1 })
+ // VPTEST xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x17)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTEST m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x17)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VPTEST ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x17)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTEST m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x17)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTEST")
+ }
+ return p
+}
+
+// VPTESTMB performs "Logical AND of Packed Byte Integer Values and Set Mask".
+//
+// Mnemonic : VPTESTMB
+// Supported forms : (6 forms)
+//
+// * VPTESTMB zmm, zmm, k{k} [AVX512BW]
+// * VPTESTMB m512, zmm, k{k} [AVX512BW]
+// * VPTESTMB xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTMB m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTMB ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTMB m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPTESTMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPTESTMB", 3, Operands { v0, v1, v2 })
+ // VPTESTMB zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMB m512, zmm, k{k}
+ if isM512(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPTESTMB xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMB m128, xmm, k{k}
+ if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPTESTMB ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMB m256, ymm, k{k}
+ if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTESTMB")
+ }
+ return p
+}
+
+// VPTESTMD performs "Logical AND of Packed Doubleword Integer Values and Set Mask".
+//
+// Mnemonic : VPTESTMD
+// Supported forms : (6 forms)
+//
+// * VPTESTMD m512/m32bcst, zmm, k{k} [AVX512F]
+// * VPTESTMD zmm, zmm, k{k} [AVX512F]
+// * VPTESTMD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPTESTMD xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPTESTMD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPTESTMD ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPTESTMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPTESTMD", 3, Operands { v0, v1, v2 })
+ // VPTESTMD m512/m32bcst, zmm, k{k}
+ if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPTESTMD zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMD m128/m32bcst, xmm, k{k}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPTESTMD xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMD m256/m32bcst, ymm, k{k}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPTESTMD ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTESTMD")
+ }
+ return p
+}
+
+// VPTESTMQ performs "Logical AND of Packed Quadword Integer Values and Set Mask".
+//
+// Mnemonic : VPTESTMQ
+// Supported forms : (6 forms)
+//
+// * VPTESTMQ m512/m64bcst, zmm, k{k} [AVX512F]
+// * VPTESTMQ zmm, zmm, k{k} [AVX512F]
+// * VPTESTMQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPTESTMQ xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPTESTMQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPTESTMQ ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPTESTMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPTESTMQ", 3, Operands { v0, v1, v2 })
+ // VPTESTMQ m512/m64bcst, zmm, k{k}
+ if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPTESTMQ zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMQ m128/m64bcst, xmm, k{k}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPTESTMQ xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMQ m256/m64bcst, ymm, k{k}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPTESTMQ ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTESTMQ")
+ }
+ return p
+}
+
+// VPTESTMW performs "Logical AND of Packed Word Integer Values and Set Mask".
+//
+// Mnemonic : VPTESTMW
+// Supported forms : (6 forms)
+//
+// * VPTESTMW zmm, zmm, k{k} [AVX512BW]
+// * VPTESTMW m512, zmm, k{k} [AVX512BW]
+// * VPTESTMW xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTMW m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTMW ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTMW m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPTESTMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPTESTMW", 3, Operands { v0, v1, v2 })
+ // VPTESTMW zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMW m512, zmm, k{k}
+ if isM512(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPTESTMW xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMW m128, xmm, k{k}
+ if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPTESTMW ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTMW m256, ymm, k{k}
+ if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTESTMW")
+ }
+ return p
+}
+
+// VPTESTNMB performs "Logical NAND of Packed Byte Integer Values and Set Mask".
+//
+// Mnemonic : VPTESTNMB
+// Supported forms : (6 forms)
+//
+// * VPTESTNMB zmm, zmm, k{k} [AVX512BW,AVX512F]
+// * VPTESTNMB m512, zmm, k{k} [AVX512BW,AVX512F]
+// * VPTESTNMB xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTNMB m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTNMB ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTNMB m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPTESTNMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPTESTNMB", 3, Operands { v0, v1, v2 })
+ // VPTESTNMB zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMB m512, zmm, k{k}
+ if isM512(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPTESTNMB xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMB m128, xmm, k{k}
+ if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPTESTNMB ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMB m256, ymm, k{k}
+ if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTESTNMB")
+ }
+ return p
+}
+
+// VPTESTNMD performs "Logical NAND of Packed Doubleword Integer Values and Set Mask".
+//
+// Mnemonic : VPTESTNMD
+// Supported forms : (6 forms)
+//
+// * VPTESTNMD m512/m32bcst, zmm, k{k} [AVX512F]
+// * VPTESTNMD zmm, zmm, k{k} [AVX512F]
+// * VPTESTNMD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPTESTNMD xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPTESTNMD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPTESTNMD ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPTESTNMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPTESTNMD", 3, Operands { v0, v1, v2 })
+ // VPTESTNMD m512/m32bcst, zmm, k{k}
+ if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPTESTNMD zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMD m128/m32bcst, xmm, k{k}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPTESTNMD xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMD m256/m32bcst, ymm, k{k}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x06, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPTESTNMD ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTESTNMD")
+ }
+ return p
+}
+
+// VPTESTNMQ performs "Logical NAND of Packed Quadword Integer Values and Set Mask".
+//
+// Mnemonic : VPTESTNMQ
+// Supported forms : (6 forms)
+//
+// * VPTESTNMQ m512/m64bcst, zmm, k{k} [AVX512F]
+// * VPTESTNMQ zmm, zmm, k{k} [AVX512F]
+// * VPTESTNMQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL]
+// * VPTESTNMQ xmm, xmm, k{k} [AVX512F,AVX512VL]
+// * VPTESTNMQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL]
+// * VPTESTNMQ ymm, ymm, k{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VPTESTNMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPTESTNMQ", 3, Operands { v0, v1, v2 })
+ // VPTESTNMQ m512/m64bcst, zmm, k{k}
+ if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x86, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPTESTNMQ zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMQ m128/m64bcst, xmm, k{k}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPTESTNMQ xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMQ m256/m64bcst, ymm, k{k}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x86, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0]))
+ m.emit(0x27)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPTESTNMQ ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x27)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTESTNMQ")
+ }
+ return p
+}
+
+// VPTESTNMW performs "Logical NAND of Packed Word Integer Values and Set Mask".
+//
+// Mnemonic : VPTESTNMW
+// Supported forms : (6 forms)
+//
+// * VPTESTNMW zmm, zmm, k{k} [AVX512BW,AVX512F]
+// * VPTESTNMW m512, zmm, k{k} [AVX512BW,AVX512F]
+// * VPTESTNMW xmm, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTNMW m128, xmm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTNMW ymm, ymm, k{k} [AVX512BW,AVX512VL]
+// * VPTESTNMW m256, ymm, k{k} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPTESTNMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPTESTNMW", 3, Operands { v0, v1, v2 })
+ // VPTESTNMW zmm, zmm, k{k}
+ if isZMM(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMW m512, zmm, k{k}
+ if isM512(v0) && isZMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512F | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x86, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPTESTNMW xmm, xmm, k{k}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMW m128, xmm, k{k}
+ if isM128(v0) && isEVEXXMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPTESTNMW ymm, ymm, k{k}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfe ^ (hlcode(v[1]) << 3))
+ m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x26)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPTESTNMW m256, ymm, k{k}
+ if isM256(v0) && isEVEXYMM(v1) && isKk(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x86, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0)
+ m.emit(0x26)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPTESTNMW")
+ }
+ return p
+}
+
+// VPUNPCKHBW performs "Unpack and Interleave High-Order Bytes into Words".
+//
+// Mnemonic : VPUNPCKHBW
+// Supported forms : (10 forms)
+//
+// * VPUNPCKHBW xmm, xmm, xmm [AVX]
+// * VPUNPCKHBW m128, xmm, xmm [AVX]
+// * VPUNPCKHBW ymm, ymm, ymm [AVX2]
+// * VPUNPCKHBW m256, ymm, ymm [AVX2]
+// * VPUNPCKHBW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPUNPCKHBW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPUNPCKHBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKHBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKHBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKHBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPUNPCKHBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPUNPCKHBW", 3, Operands { v0, v1, v2 })
+ // VPUNPCKHBW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHBW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x68)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKHBW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHBW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x68)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKHBW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHBW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x68)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPUNPCKHBW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHBW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x68)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPUNPCKHBW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x68)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHBW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x68)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPUNPCKHBW")
+ }
+ return p
+}
+
+// VPUNPCKHDQ performs "Unpack and Interleave High-Order Doublewords into Quadwords".
+//
+// Mnemonic : VPUNPCKHDQ
+// Supported forms : (10 forms)
+//
+// * VPUNPCKHDQ xmm, xmm, xmm [AVX]
+// * VPUNPCKHDQ m128, xmm, xmm [AVX]
+// * VPUNPCKHDQ ymm, ymm, ymm [AVX2]
+// * VPUNPCKHDQ m256, ymm, ymm [AVX2]
+// * VPUNPCKHDQ m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPUNPCKHDQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPUNPCKHDQ m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKHDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKHDQ m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKHDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPUNPCKHDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPUNPCKHDQ", 3, Operands { v0, v1, v2 })
+ // VPUNPCKHDQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x6a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHDQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x6a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKHDQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x6a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHDQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x6a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKHDQ m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPUNPCKHDQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x6a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHDQ m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPUNPCKHDQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x6a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHDQ m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6a)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPUNPCKHDQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x6a)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPUNPCKHDQ")
+ }
+ return p
+}
+
+// VPUNPCKHQDQ performs "Unpack and Interleave High-Order Quadwords into Double Quadwords".
+//
+// Mnemonic : VPUNPCKHQDQ
+// Supported forms : (10 forms)
+//
+// * VPUNPCKHQDQ xmm, xmm, xmm [AVX]
+// * VPUNPCKHQDQ m128, xmm, xmm [AVX]
+// * VPUNPCKHQDQ ymm, ymm, ymm [AVX2]
+// * VPUNPCKHQDQ m256, ymm, ymm [AVX2]
+// * VPUNPCKHQDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPUNPCKHQDQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPUNPCKHQDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKHQDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKHQDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKHQDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPUNPCKHQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPUNPCKHQDQ", 3, Operands { v0, v1, v2 })
+ // VPUNPCKHQDQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHQDQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x6d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKHQDQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHQDQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x6d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKHQDQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPUNPCKHQDQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHQDQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPUNPCKHQDQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHQDQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPUNPCKHQDQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x6d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPUNPCKHQDQ")
+ }
+ return p
+}
+
+// VPUNPCKHWD performs "Unpack and Interleave High-Order Words into Doublewords".
+//
+// Mnemonic : VPUNPCKHWD
+// Supported forms : (10 forms)
+//
+// * VPUNPCKHWD xmm, xmm, xmm [AVX]
+// * VPUNPCKHWD m128, xmm, xmm [AVX]
+// * VPUNPCKHWD ymm, ymm, ymm [AVX2]
+// * VPUNPCKHWD m256, ymm, ymm [AVX2]
+// * VPUNPCKHWD zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPUNPCKHWD m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPUNPCKHWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKHWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKHWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKHWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPUNPCKHWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPUNPCKHWD", 3, Operands { v0, v1, v2 })
+ // VPUNPCKHWD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHWD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x69)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKHWD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHWD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x69)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKHWD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHWD m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x69)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPUNPCKHWD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHWD m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x69)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPUNPCKHWD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x69)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKHWD m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x69)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPUNPCKHWD")
+ }
+ return p
+}
+
+// VPUNPCKLBW performs "Unpack and Interleave Low-Order Bytes into Words".
+//
+// Mnemonic : VPUNPCKLBW
+// Supported forms : (10 forms)
+//
+// * VPUNPCKLBW xmm, xmm, xmm [AVX]
+// * VPUNPCKLBW m128, xmm, xmm [AVX]
+// * VPUNPCKLBW ymm, ymm, ymm [AVX2]
+// * VPUNPCKLBW m256, ymm, ymm [AVX2]
+// * VPUNPCKLBW zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPUNPCKLBW m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPUNPCKLBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKLBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKLBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKLBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPUNPCKLBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPUNPCKLBW", 3, Operands { v0, v1, v2 })
+ // VPUNPCKLBW xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x60)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLBW m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x60)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKLBW ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x60)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLBW m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x60)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKLBW zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x60)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLBW m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x60)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPUNPCKLBW xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x60)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLBW m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x60)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPUNPCKLBW ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x60)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLBW m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x60)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPUNPCKLBW")
+ }
+ return p
+}
+
+// VPUNPCKLDQ performs "Unpack and Interleave Low-Order Doublewords into Quadwords".
+//
+// Mnemonic : VPUNPCKLDQ
+// Supported forms : (10 forms)
+//
+// * VPUNPCKLDQ xmm, xmm, xmm [AVX]
+// * VPUNPCKLDQ m128, xmm, xmm [AVX]
+// * VPUNPCKLDQ ymm, ymm, ymm [AVX2]
+// * VPUNPCKLDQ m256, ymm, ymm [AVX2]
+// * VPUNPCKLDQ m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPUNPCKLDQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPUNPCKLDQ m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKLDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKLDQ m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKLDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPUNPCKLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPUNPCKLDQ", 3, Operands { v0, v1, v2 })
+ // VPUNPCKLDQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x62)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLDQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x62)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKLDQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x62)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLDQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x62)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKLDQ m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x62)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPUNPCKLDQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x62)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLDQ m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x62)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPUNPCKLDQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x62)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLDQ m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x62)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPUNPCKLDQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x62)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPUNPCKLDQ")
+ }
+ return p
+}
+
+// VPUNPCKLQDQ performs "Unpack and Interleave Low-Order Quadwords into Double Quadwords".
+//
+// Mnemonic : VPUNPCKLQDQ
+// Supported forms : (10 forms)
+//
+// * VPUNPCKLQDQ xmm, xmm, xmm [AVX]
+// * VPUNPCKLQDQ m128, xmm, xmm [AVX]
+// * VPUNPCKLQDQ ymm, ymm, ymm [AVX2]
+// * VPUNPCKLQDQ m256, ymm, ymm [AVX2]
+// * VPUNPCKLQDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPUNPCKLQDQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPUNPCKLQDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKLQDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKLQDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPUNPCKLQDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPUNPCKLQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPUNPCKLQDQ", 3, Operands { v0, v1, v2 })
+ // VPUNPCKLQDQ xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLQDQ m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x6c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKLQDQ ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLQDQ m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x6c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKLQDQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPUNPCKLQDQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLQDQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPUNPCKLQDQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLQDQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x6c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPUNPCKLQDQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x6c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPUNPCKLQDQ")
+ }
+ return p
+}
+
+// VPUNPCKLWD performs "Unpack and Interleave Low-Order Words into Doublewords".
+//
+// Mnemonic : VPUNPCKLWD
+// Supported forms : (10 forms)
+//
+// * VPUNPCKLWD xmm, xmm, xmm [AVX]
+// * VPUNPCKLWD m128, xmm, xmm [AVX]
+// * VPUNPCKLWD ymm, ymm, ymm [AVX2]
+// * VPUNPCKLWD m256, ymm, ymm [AVX2]
+// * VPUNPCKLWD zmm, zmm, zmm{k}{z} [AVX512BW]
+// * VPUNPCKLWD m512, zmm, zmm{k}{z} [AVX512BW]
+// * VPUNPCKLWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKLWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKLWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+// * VPUNPCKLWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL]
+//
+func (self *Program) VPUNPCKLWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPUNPCKLWD", 3, Operands { v0, v1, v2 })
+ // VPUNPCKLWD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x61)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLWD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x61)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKLWD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x61)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLWD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x61)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPUNPCKLWD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x61)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLWD m512, zmm, zmm{k}{z}
+ if isM512(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x61)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPUNPCKLWD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x61)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLWD m128, xmm, xmm{k}{z}
+ if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x61)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPUNPCKLWD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x61)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPUNPCKLWD m256, ymm, ymm{k}{z}
+ if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512BW)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x61)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPUNPCKLWD")
+ }
+ return p
+}
+
+// VPXOR performs "Packed Bitwise Logical Exclusive OR".
+//
+// Mnemonic : VPXOR
+// Supported forms : (4 forms)
+//
+// * VPXOR xmm, xmm, xmm [AVX]
+// * VPXOR m128, xmm, xmm [AVX]
+// * VPXOR ymm, ymm, ymm [AVX2]
+// * VPXOR m256, ymm, ymm [AVX2]
+//
+func (self *Program) VPXOR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPXOR", 3, Operands { v0, v1, v2 })
+ // VPXOR xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPXOR m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xef)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VPXOR ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPXOR m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX2)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0xef)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPXOR")
+ }
+ return p
+}
+
+// VPXORD performs "Bitwise Logical Exclusive OR of Packed Doubleword Integers".
+//
+// Mnemonic : VPXORD
+// Supported forms : (6 forms)
+//
+// * VPXORD m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPXORD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPXORD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPXORD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPXORD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPXORD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPXORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPXORD", 3, Operands { v0, v1, v2 })
+ // VPXORD m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xef)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPXORD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPXORD m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xef)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPXORD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPXORD m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xef)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPXORD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPXORD")
+ }
+ return p
+}
+
+// VPXORQ performs "Bitwise Logical Exclusive OR of Packed Quadword Integers".
+//
+// Mnemonic : VPXORQ
+// Supported forms : (6 forms)
+//
+// * VPXORQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VPXORQ zmm, zmm, zmm{k}{z} [AVX512F]
+// * VPXORQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPXORQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VPXORQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VPXORQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VPXORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VPXORQ", 3, Operands { v0, v1, v2 })
+ // VPXORQ m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xef)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VPXORQ zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPXORQ m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xef)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VPXORQ xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VPXORQ m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0xef)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VPXORQ ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0xef)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VPXORQ")
+ }
+ return p
+}
+
+// VRANGEPD performs "Range Restriction Calculation For Packed Pairs of Double-Precision Floating-Point Values".
+//
+// Mnemonic : VRANGEPD
+// Supported forms : (7 forms)
+//
+// * VRANGEPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VRANGEPD imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VRANGEPD imm8, zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VRANGEPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VRANGEPD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VRANGEPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VRANGEPD imm8, ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VRANGEPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRANGEPD", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VRANGEPD", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VRANGEPD takes 4 or 5 operands")
+ }
+ // VRANGEPD imm8, m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x50)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPD imm8, {sae}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPD imm8, zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPD imm8, m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x50)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPD imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPD imm8, m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x50)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPD imm8, ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRANGEPD")
+ }
+ return p
+}
+
+// VRANGEPS performs "Range Restriction Calculation For Packed Pairs of Single-Precision Floating-Point Values".
+//
+// Mnemonic : VRANGEPS
+// Supported forms : (7 forms)
+//
+// * VRANGEPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VRANGEPS imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VRANGEPS imm8, zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VRANGEPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VRANGEPS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VRANGEPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VRANGEPS imm8, ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VRANGEPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRANGEPS", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VRANGEPS", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VRANGEPS takes 4 or 5 operands")
+ }
+ // VRANGEPS imm8, m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x50)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPS imm8, {sae}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPS imm8, zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPS imm8, m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x50)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPS imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPS imm8, m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x50)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGEPS imm8, ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x50)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRANGEPS")
+ }
+ return p
+}
+
+// VRANGESD performs "Range Restriction Calculation For a pair of Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VRANGESD
+// Supported forms : (3 forms)
+//
+// * VRANGESD imm8, m64, xmm, xmm{k}{z} [AVX512DQ]
+// * VRANGESD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512DQ]
+// * VRANGESD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ]
+//
+func (self *Program) VRANGESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRANGESD", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VRANGESD", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VRANGESD takes 4 or 5 operands")
+ }
+ // VRANGESD imm8, m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x51)
+ m.mrsd(lcode(v[3]), addr(v[1]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGESD imm8, {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGESD imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRANGESD")
+ }
+ return p
+}
+
+// VRANGESS performs "Range Restriction Calculation For a pair of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VRANGESS
+// Supported forms : (3 forms)
+//
+// * VRANGESS imm8, m32, xmm, xmm{k}{z} [AVX512DQ]
+// * VRANGESS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512DQ]
+// * VRANGESS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ]
+//
+func (self *Program) VRANGESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRANGESS", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VRANGESS", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VRANGESS takes 4 or 5 operands")
+ }
+ // VRANGESS imm8, m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x51)
+ m.mrsd(lcode(v[3]), addr(v[1]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGESS imm8, {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRANGESS imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRANGESS")
+ }
+ return p
+}
+
+// VRCP14PD performs "Compute Approximate Reciprocals of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VRCP14PD
+// Supported forms : (6 forms)
+//
+// * VRCP14PD m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VRCP14PD zmm, zmm{k}{z} [AVX512F]
+// * VRCP14PD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRCP14PD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VRCP14PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRCP14PD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VRCP14PD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VRCP14PD", 2, Operands { v0, v1 })
+ // VRCP14PD m512/m64bcst, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VRCP14PD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRCP14PD m128/m64bcst, xmm{k}{z}
+ if isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VRCP14PD m256/m64bcst, ymm{k}{z}
+ if isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VRCP14PD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRCP14PD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCP14PD")
+ }
+ return p
+}
+
+// VRCP14PS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VRCP14PS
+// Supported forms : (6 forms)
+//
+// * VRCP14PS m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VRCP14PS zmm, zmm{k}{z} [AVX512F]
+// * VRCP14PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRCP14PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VRCP14PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRCP14PS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VRCP14PS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VRCP14PS", 2, Operands { v0, v1 })
+ // VRCP14PS m512/m32bcst, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VRCP14PS zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRCP14PS m128/m32bcst, xmm{k}{z}
+ if isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VRCP14PS m256/m32bcst, ymm{k}{z}
+ if isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4c)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VRCP14PS xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRCP14PS ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x4c)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCP14PS")
+ }
+ return p
+}
+
+// VRCP14SD performs "Compute Approximate Reciprocal of a Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VRCP14SD
+// Supported forms : (2 forms)
+//
+// * VRCP14SD xmm, xmm, xmm{k}{z} [AVX512F]
+// * VRCP14SD m64, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VRCP14SD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VRCP14SD", 3, Operands { v0, v1, v2 })
+ // VRCP14SD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x4d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRCP14SD m64, xmm, xmm{k}{z}
+ if isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x4d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCP14SD")
+ }
+ return p
+}
+
+// VRCP14SS performs "Compute Approximate Reciprocal of a Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VRCP14SS
+// Supported forms : (2 forms)
+//
+// * VRCP14SS xmm, xmm, xmm{k}{z} [AVX512F]
+// * VRCP14SS m32, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VRCP14SS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VRCP14SS", 3, Operands { v0, v1, v2 })
+ // VRCP14SS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x4d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRCP14SS m32, xmm, xmm{k}{z}
+ if isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x4d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCP14SS")
+ }
+ return p
+}
+
+// VRCP28PD performs "Approximation to the Reciprocal of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error".
+//
+// Mnemonic : VRCP28PD
+// Supported forms : (3 forms)
+//
+// * VRCP28PD m512/m64bcst, zmm{k}{z} [AVX512ER]
+// * VRCP28PD {sae}, zmm, zmm{k}{z} [AVX512ER]
+// * VRCP28PD zmm, zmm{k}{z} [AVX512ER]
+//
+func (self *Program) VRCP28PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRCP28PD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VRCP28PD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VRCP28PD takes 2 or 3 operands")
+ }
+ // VRCP28PD m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xca)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VRCP28PD {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0xca)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VRCP28PD zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xca)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCP28PD")
+ }
+ return p
+}
+
+// VRCP28PS performs "Approximation to the Reciprocal of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error".
+//
+// Mnemonic : VRCP28PS
+// Supported forms : (3 forms)
+//
+// * VRCP28PS m512/m32bcst, zmm{k}{z} [AVX512ER]
+// * VRCP28PS {sae}, zmm, zmm{k}{z} [AVX512ER]
+// * VRCP28PS zmm, zmm{k}{z} [AVX512ER]
+//
+func (self *Program) VRCP28PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRCP28PS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VRCP28PS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VRCP28PS takes 2 or 3 operands")
+ }
+ // VRCP28PS m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xca)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VRCP28PS {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0xca)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VRCP28PS zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xca)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCP28PS")
+ }
+ return p
+}
+
+// VRCP28SD performs "Approximation to the Reciprocal of a Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error".
+//
+// Mnemonic : VRCP28SD
+// Supported forms : (3 forms)
+//
+// * VRCP28SD m64, xmm, xmm{k}{z} [AVX512ER]
+// * VRCP28SD {sae}, xmm, xmm, xmm{k}{z} [AVX512ER]
+// * VRCP28SD xmm, xmm, xmm{k}{z} [AVX512ER]
+//
+func (self *Program) VRCP28SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRCP28SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VRCP28SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VRCP28SD takes 3 or 4 operands")
+ }
+ // VRCP28SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xcb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VRCP28SD {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xcb)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VRCP28SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xcb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCP28SD")
+ }
+ return p
+}
+
+// VRCP28SS performs "Approximation to the Reciprocal of a Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error".
+//
+// Mnemonic : VRCP28SS
+// Supported forms : (3 forms)
+//
+// * VRCP28SS m32, xmm, xmm{k}{z} [AVX512ER]
+// * VRCP28SS {sae}, xmm, xmm, xmm{k}{z} [AVX512ER]
+// * VRCP28SS xmm, xmm, xmm{k}{z} [AVX512ER]
+//
+func (self *Program) VRCP28SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRCP28SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VRCP28SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VRCP28SS takes 3 or 4 operands")
+ }
+ // VRCP28SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xcb)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VRCP28SS {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xcb)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VRCP28SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xcb)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCP28SS")
+ }
+ return p
+}
+
+// VRCPPS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VRCPPS
+// Supported forms : (4 forms)
+//
+// * VRCPPS xmm, xmm [AVX]
+// * VRCPPS m128, xmm [AVX]
+// * VRCPPS ymm, ymm [AVX]
+// * VRCPPS m256, ymm [AVX]
+//
+func (self *Program) VRCPPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VRCPPS", 2, Operands { v0, v1 })
+ // VRCPPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x53)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRCPPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x53)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VRCPPS ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), v[0], 0)
+ m.emit(0x53)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRCPPS m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x53)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCPPS")
+ }
+ return p
+}
+
+// VRCPSS performs "Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VRCPSS
+// Supported forms : (2 forms)
+//
+// * VRCPSS xmm, xmm, xmm [AVX]
+// * VRCPSS m32, xmm, xmm [AVX]
+//
+func (self *Program) VRCPSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VRCPSS", 3, Operands { v0, v1, v2 })
+ // VRCPSS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x53)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRCPSS m32, xmm, xmm
+ if isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x53)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRCPSS")
+ }
+ return p
+}
+
+// VREDUCEPD performs "Perform Reduction Transformation on Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VREDUCEPD
+// Supported forms : (6 forms)
+//
+// * VREDUCEPD imm8, m512/m64bcst, zmm{k}{z} [AVX512DQ]
+// * VREDUCEPD imm8, zmm, zmm{k}{z} [AVX512DQ]
+// * VREDUCEPD imm8, m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VREDUCEPD imm8, m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VREDUCEPD imm8, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VREDUCEPD imm8, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VREDUCEPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VREDUCEPD", 3, Operands { v0, v1, v2 })
+ // VREDUCEPD imm8, m512/m64bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPD imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPD imm8, m128/m64bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPD imm8, m256/m64bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPD imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPD imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VREDUCEPD")
+ }
+ return p
+}
+
+// VREDUCEPS performs "Perform Reduction Transformation on Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VREDUCEPS
+// Supported forms : (6 forms)
+//
+// * VREDUCEPS imm8, m512/m32bcst, zmm{k}{z} [AVX512DQ]
+// * VREDUCEPS imm8, zmm, zmm{k}{z} [AVX512DQ]
+// * VREDUCEPS imm8, m128/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VREDUCEPS imm8, m256/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VREDUCEPS imm8, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VREDUCEPS imm8, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VREDUCEPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VREDUCEPS", 3, Operands { v0, v1, v2 })
+ // VREDUCEPS imm8, m512/m32bcst, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPS imm8, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPS imm8, m128/m32bcst, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPS imm8, m256/m32bcst, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x56)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPS imm8, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCEPS imm8, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x56)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VREDUCEPS")
+ }
+ return p
+}
+
+// VREDUCESD performs "Perform Reduction Transformation on a Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VREDUCESD
+// Supported forms : (2 forms)
+//
+// * VREDUCESD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ]
+// * VREDUCESD imm8, m64, xmm, xmm{k}{z} [AVX512DQ]
+//
+func (self *Program) VREDUCESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VREDUCESD", 4, Operands { v0, v1, v2, v3 })
+ // VREDUCESD imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCESD imm8, m64, xmm, xmm{k}{z}
+ if isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x57)
+ m.mrsd(lcode(v[3]), addr(v[1]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VREDUCESD")
+ }
+ return p
+}
+
+// VREDUCESS performs "Perform Reduction Transformation on a Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VREDUCESS
+// Supported forms : (2 forms)
+//
+// * VREDUCESS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ]
+// * VREDUCESS imm8, m32, xmm, xmm{k}{z} [AVX512DQ]
+//
+func (self *Program) VREDUCESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VREDUCESS", 4, Operands { v0, v1, v2, v3 })
+ // VREDUCESS imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VREDUCESS imm8, m32, xmm, xmm{k}{z}
+ if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x57)
+ m.mrsd(lcode(v[3]), addr(v[1]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VREDUCESS")
+ }
+ return p
+}
+
+// VRNDSCALEPD performs "Round Packed Double-Precision Floating-Point Values To Include A Given Number Of Fraction Bits".
+//
+// Mnemonic : VRNDSCALEPD
+// Supported forms : (7 forms)
+//
+// * VRNDSCALEPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VRNDSCALEPD imm8, {sae}, zmm, zmm{k}{z} [AVX512F]
+// * VRNDSCALEPD imm8, zmm, zmm{k}{z} [AVX512F]
+// * VRNDSCALEPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRNDSCALEPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VRNDSCALEPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRNDSCALEPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VRNDSCALEPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRNDSCALEPD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VRNDSCALEPD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VRNDSCALEPD takes 3 or 4 operands")
+ }
+ // VRNDSCALEPD imm8, m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x09)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPD imm8, {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPD imm8, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPD imm8, m128/m64bcst, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x09)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPD imm8, m256/m64bcst, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x09)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPD imm8, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPD imm8, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRNDSCALEPD")
+ }
+ return p
+}
+
+// VRNDSCALEPS performs "Round Packed Single-Precision Floating-Point Values To Include A Given Number Of Fraction Bits".
+//
+// Mnemonic : VRNDSCALEPS
+// Supported forms : (7 forms)
+//
+// * VRNDSCALEPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VRNDSCALEPS imm8, {sae}, zmm, zmm{k}{z} [AVX512F]
+// * VRNDSCALEPS imm8, zmm, zmm{k}{z} [AVX512F]
+// * VRNDSCALEPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRNDSCALEPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VRNDSCALEPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRNDSCALEPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VRNDSCALEPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRNDSCALEPS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VRNDSCALEPS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VRNDSCALEPS takes 3 or 4 operands")
+ }
+ // VRNDSCALEPS imm8, m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x08)
+ m.mrsd(lcode(v[2]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPS imm8, {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18)
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPS imm8, zmm, zmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48)
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPS imm8, m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x08)
+ m.mrsd(lcode(v[2]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPS imm8, m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1]))
+ m.emit(0x08)
+ m.mrsd(lcode(v[2]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPS imm8, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08)
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALEPS imm8, ymm, ymm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28)
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRNDSCALEPS")
+ }
+ return p
+}
+
+// VRNDSCALESD performs "Round Scalar Double-Precision Floating-Point Value To Include A Given Number Of Fraction Bits".
+//
+// Mnemonic : VRNDSCALESD
+// Supported forms : (3 forms)
+//
+// * VRNDSCALESD imm8, m64, xmm, xmm{k}{z} [AVX512F]
+// * VRNDSCALESD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VRNDSCALESD imm8, xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VRNDSCALESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRNDSCALESD", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VRNDSCALESD", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VRNDSCALESD takes 4 or 5 operands")
+ }
+ // VRNDSCALESD imm8, m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x0b)
+ m.mrsd(lcode(v[3]), addr(v[1]), 8)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALESD imm8, {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALESD imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRNDSCALESD")
+ }
+ return p
+}
+
+// VRNDSCALESS performs "Round Scalar Single-Precision Floating-Point Value To Include A Given Number Of Fraction Bits".
+//
+// Mnemonic : VRNDSCALESS
+// Supported forms : (3 forms)
+//
+// * VRNDSCALESS imm8, m32, xmm, xmm{k}{z} [AVX512F]
+// * VRNDSCALESS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VRNDSCALESS imm8, xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VRNDSCALESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRNDSCALESS", 4, Operands { v0, v1, v2, v3 })
+ case 1 : p = self.alloc("VRNDSCALESS", 5, Operands { v0, v1, v2, v3, vv[0] })
+ default : panic("instruction VRNDSCALESS takes 4 or 5 operands")
+ }
+ // VRNDSCALESS imm8, m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0)
+ m.emit(0x0a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 4)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALESS imm8, {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[3]) << 3))
+ m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10)
+ m.emit(0x0a)
+ m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VRNDSCALESS imm8, xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x0a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRNDSCALESS")
+ }
+ return p
+}
+
+// VROUNDPD performs "Round Packed Double Precision Floating-Point Values".
+//
+// Mnemonic : VROUNDPD
+// Supported forms : (4 forms)
+//
+// * VROUNDPD imm8, xmm, xmm [AVX]
+// * VROUNDPD imm8, m128, xmm [AVX]
+// * VROUNDPD imm8, ymm, ymm [AVX]
+// * VROUNDPD imm8, m256, ymm [AVX]
+//
+func (self *Program) VROUNDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VROUNDPD", 3, Operands { v0, v1, v2 })
+ // VROUNDPD imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VROUNDPD imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x09)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VROUNDPD imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d)
+ m.emit(0x09)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VROUNDPD imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x09)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VROUNDPD")
+ }
+ return p
+}
+
+// VROUNDPS performs "Round Packed Single Precision Floating-Point Values".
+//
+// Mnemonic : VROUNDPS
+// Supported forms : (4 forms)
+//
+// * VROUNDPS imm8, xmm, xmm [AVX]
+// * VROUNDPS imm8, m128, xmm [AVX]
+// * VROUNDPS imm8, ymm, ymm [AVX]
+// * VROUNDPS imm8, m256, ymm [AVX]
+//
+func (self *Program) VROUNDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VROUNDPS", 3, Operands { v0, v1, v2 })
+ // VROUNDPS imm8, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79)
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VROUNDPS imm8, m128, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x08)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VROUNDPS imm8, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x7d)
+ m.emit(0x08)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VROUNDPS imm8, m256, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0)
+ m.emit(0x08)
+ m.mrsd(lcode(v[2]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VROUNDPS")
+ }
+ return p
+}
+
+// VROUNDSD performs "Round Scalar Double Precision Floating-Point Values".
+//
+// Mnemonic : VROUNDSD
+// Supported forms : (2 forms)
+//
+// * VROUNDSD imm8, xmm, xmm, xmm [AVX]
+// * VROUNDSD imm8, m64, xmm, xmm [AVX]
+//
+func (self *Program) VROUNDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VROUNDSD", 4, Operands { v0, v1, v2, v3 })
+ // VROUNDSD imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x0b)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VROUNDSD imm8, m64, xmm, xmm
+ if isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0b)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VROUNDSD")
+ }
+ return p
+}
+
+// VROUNDSS performs "Round Scalar Single Precision Floating-Point Values".
+//
+// Mnemonic : VROUNDSS
+// Supported forms : (2 forms)
+//
+// * VROUNDSS imm8, xmm, xmm, xmm [AVX]
+// * VROUNDSS imm8, m32, xmm, xmm [AVX]
+//
+func (self *Program) VROUNDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VROUNDSS", 4, Operands { v0, v1, v2, v3 })
+ // VROUNDSS imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5))
+ m.emit(0x79 ^ (hlcode(v[2]) << 3))
+ m.emit(0x0a)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VROUNDSS imm8, m32, xmm, xmm
+ if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0x0a)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VROUNDSS")
+ }
+ return p
+}
+
+// VRSQRT14PD performs "Compute Approximate Reciprocals of Square Roots of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VRSQRT14PD
+// Supported forms : (6 forms)
+//
+// * VRSQRT14PD m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VRSQRT14PD zmm, zmm{k}{z} [AVX512F]
+// * VRSQRT14PD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRSQRT14PD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VRSQRT14PD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRSQRT14PD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VRSQRT14PD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VRSQRT14PD", 2, Operands { v0, v1 })
+ // VRSQRT14PD m512/m64bcst, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VRSQRT14PD zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRSQRT14PD m128/m64bcst, xmm{k}{z}
+ if isM128M64bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VRSQRT14PD m256/m64bcst, ymm{k}{z}
+ if isM256M64bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VRSQRT14PD xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRSQRT14PD ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRT14PD")
+ }
+ return p
+}
+
+// VRSQRT14PS performs "Compute Approximate Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VRSQRT14PS
+// Supported forms : (6 forms)
+//
+// * VRSQRT14PS m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VRSQRT14PS zmm, zmm{k}{z} [AVX512F]
+// * VRSQRT14PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRSQRT14PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VRSQRT14PS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VRSQRT14PS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VRSQRT14PS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VRSQRT14PS", 2, Operands { v0, v1 })
+ // VRSQRT14PS m512/m32bcst, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VRSQRT14PS zmm, zmm{k}{z}
+ if isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRSQRT14PS m128/m32bcst, xmm{k}{z}
+ if isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VRSQRT14PS m256/m32bcst, ymm{k}{z}
+ if isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x4e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VRSQRT14PS xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRSQRT14PS ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x4e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRT14PS")
+ }
+ return p
+}
+
+// VRSQRT14SD performs "Compute Approximate Reciprocal of a Square Root of a Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VRSQRT14SD
+// Supported forms : (2 forms)
+//
+// * VRSQRT14SD xmm, xmm, xmm{k}{z} [AVX512F]
+// * VRSQRT14SD m64, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VRSQRT14SD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VRSQRT14SD", 3, Operands { v0, v1, v2 })
+ // VRSQRT14SD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x4f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRSQRT14SD m64, xmm, xmm{k}{z}
+ if isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x4f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRT14SD")
+ }
+ return p
+}
+
+// VRSQRT14SS performs "Compute Approximate Reciprocal of a Square Root of a Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VRSQRT14SS
+// Supported forms : (2 forms)
+//
+// * VRSQRT14SS xmm, xmm, xmm{k}{z} [AVX512F]
+// * VRSQRT14SS m32, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VRSQRT14SS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VRSQRT14SS", 3, Operands { v0, v1, v2 })
+ // VRSQRT14SS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x4f)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRSQRT14SS m32, xmm, xmm{k}{z}
+ if isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x4f)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRT14SS")
+ }
+ return p
+}
+
+// VRSQRT28PD performs "Approximation to the Reciprocal Square Root of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error".
+//
+// Mnemonic : VRSQRT28PD
+// Supported forms : (3 forms)
+//
+// * VRSQRT28PD m512/m64bcst, zmm{k}{z} [AVX512ER]
+// * VRSQRT28PD {sae}, zmm, zmm{k}{z} [AVX512ER]
+// * VRSQRT28PD zmm, zmm{k}{z} [AVX512ER]
+//
+func (self *Program) VRSQRT28PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRSQRT28PD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VRSQRT28PD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VRSQRT28PD takes 2 or 3 operands")
+ }
+ // VRSQRT28PD m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xcc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VRSQRT28PD {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0xcc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VRSQRT28PD zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xcc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRT28PD")
+ }
+ return p
+}
+
+// VRSQRT28PS performs "Approximation to the Reciprocal Square Root of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error".
+//
+// Mnemonic : VRSQRT28PS
+// Supported forms : (3 forms)
+//
+// * VRSQRT28PS m512/m32bcst, zmm{k}{z} [AVX512ER]
+// * VRSQRT28PS {sae}, zmm, zmm{k}{z} [AVX512ER]
+// * VRSQRT28PS zmm, zmm{k}{z} [AVX512ER]
+//
+func (self *Program) VRSQRT28PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRSQRT28PS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VRSQRT28PS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VRSQRT28PS takes 2 or 3 operands")
+ }
+ // VRSQRT28PS m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0xcc)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VRSQRT28PS {sae}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18)
+ m.emit(0xcc)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VRSQRT28PS zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7d)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0xcc)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRT28PS")
+ }
+ return p
+}
+
+// VRSQRT28SD performs "Approximation to the Reciprocal Square Root of a Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error".
+//
+// Mnemonic : VRSQRT28SD
+// Supported forms : (3 forms)
+//
+// * VRSQRT28SD m64, xmm, xmm{k}{z} [AVX512ER]
+// * VRSQRT28SD {sae}, xmm, xmm, xmm{k}{z} [AVX512ER]
+// * VRSQRT28SD xmm, xmm, xmm{k}{z} [AVX512ER]
+//
+func (self *Program) VRSQRT28SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRSQRT28SD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VRSQRT28SD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VRSQRT28SD takes 3 or 4 operands")
+ }
+ // VRSQRT28SD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xcd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VRSQRT28SD {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xcd)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VRSQRT28SD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xcd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRT28SD")
+ }
+ return p
+}
+
+// VRSQRT28SS performs "Approximation to the Reciprocal Square Root of a Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error".
+//
+// Mnemonic : VRSQRT28SS
+// Supported forms : (3 forms)
+//
+// * VRSQRT28SS m32, xmm, xmm{k}{z} [AVX512ER]
+// * VRSQRT28SS {sae}, xmm, xmm, xmm{k}{z} [AVX512ER]
+// * VRSQRT28SS xmm, xmm, xmm{k}{z} [AVX512ER]
+//
+func (self *Program) VRSQRT28SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VRSQRT28SS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VRSQRT28SS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VRSQRT28SS takes 3 or 4 operands")
+ }
+ // VRSQRT28SS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0xcd)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VRSQRT28SS {sae}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0xcd)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VRSQRT28SS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512ER)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0xcd)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRT28SS")
+ }
+ return p
+}
+
+// VRSQRTPS performs "Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VRSQRTPS
+// Supported forms : (4 forms)
+//
+// * VRSQRTPS xmm, xmm [AVX]
+// * VRSQRTPS m128, xmm [AVX]
+// * VRSQRTPS ymm, ymm [AVX]
+// * VRSQRTPS m256, ymm [AVX]
+//
+func (self *Program) VRSQRTPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VRSQRTPS", 2, Operands { v0, v1 })
+ // VRSQRTPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x52)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRSQRTPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x52)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VRSQRTPS ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), v[0], 0)
+ m.emit(0x52)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRSQRTPS m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x52)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRTPS")
+ }
+ return p
+}
+
+// VRSQRTSS performs "Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VRSQRTSS
+// Supported forms : (2 forms)
+//
+// * VRSQRTSS xmm, xmm, xmm [AVX]
+// * VRSQRTSS m32, xmm, xmm [AVX]
+//
+func (self *Program) VRSQRTSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VRSQRTSS", 3, Operands { v0, v1, v2 })
+ // VRSQRTSS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x52)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VRSQRTSS m32, xmm, xmm
+ if isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x52)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VRSQRTSS")
+ }
+ return p
+}
+
+// VSCALEFPD performs "Scale Packed Double-Precision Floating-Point Values With Double-Precision Floating-Point Values".
+//
+// Mnemonic : VSCALEFPD
+// Supported forms : (7 forms)
+//
+// * VSCALEFPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSCALEFPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSCALEFPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSCALEFPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSCALEFPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSCALEFPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSCALEFPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSCALEFPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSCALEFPD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSCALEFPD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSCALEFPD takes 3 or 4 operands")
+ }
+ // VSCALEFPD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x2c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VSCALEFPD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSCALEFPD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSCALEFPD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x2c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VSCALEFPD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSCALEFPD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x2c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VSCALEFPD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCALEFPD")
+ }
+ return p
+}
+
+// VSCALEFPS performs "Scale Packed Single-Precision Floating-Point Values With Single-Precision Floating-Point Values".
+//
+// Mnemonic : VSCALEFPS
+// Supported forms : (7 forms)
+//
+// * VSCALEFPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSCALEFPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSCALEFPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSCALEFPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSCALEFPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSCALEFPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSCALEFPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSCALEFPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSCALEFPS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSCALEFPS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSCALEFPS takes 3 or 4 operands")
+ }
+ // VSCALEFPS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x2c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VSCALEFPS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSCALEFPS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSCALEFPS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x2c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VSCALEFPS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSCALEFPS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x2c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VSCALEFPS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x2c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCALEFPS")
+ }
+ return p
+}
+
+// VSCALEFSD performs "Scale Scalar Double-Precision Floating-Point Value With a Double-Precision Floating-Point Value".
+//
+// Mnemonic : VSCALEFSD
+// Supported forms : (3 forms)
+//
+// * VSCALEFSD m64, xmm, xmm{k}{z} [AVX512F]
+// * VSCALEFSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VSCALEFSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VSCALEFSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSCALEFSD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSCALEFSD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSCALEFSD takes 3 or 4 operands")
+ }
+ // VSCALEFSD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VSCALEFSD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSCALEFSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCALEFSD")
+ }
+ return p
+}
+
+// VSCALEFSS performs "Scale Scalar Single-Precision Floating-Point Value With a Single-Precision Floating-Point Value".
+//
+// Mnemonic : VSCALEFSS
+// Supported forms : (3 forms)
+//
+// * VSCALEFSS m32, xmm, xmm{k}{z} [AVX512F]
+// * VSCALEFSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VSCALEFSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VSCALEFSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSCALEFSS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSCALEFSS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSCALEFSS takes 3 or 4 operands")
+ }
+ // VSCALEFSS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x2d)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VSCALEFSS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSCALEFSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x2d)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCALEFSS")
+ }
+ return p
+}
+
+// VSCATTERDPD performs "Scatter Packed Double-Precision Floating-Point Values with Signed Doubleword Indices".
+//
+// Mnemonic : VSCATTERDPD
+// Supported forms : (3 forms)
+//
+// * VSCATTERDPD zmm, vm32y{k} [AVX512F]
+// * VSCATTERDPD xmm, vm32x{k} [AVX512F,AVX512VL]
+// * VSCATTERDPD ymm, vm32x{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VSCATTERDPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VSCATTERDPD", 2, Operands { v0, v1 })
+ // VSCATTERDPD zmm, vm32y{k}
+ if isZMM(v0) && isVMYk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa2)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VSCATTERDPD xmm, vm32x{k}
+ if isEVEXXMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa2)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VSCATTERDPD ymm, vm32x{k}
+ if isEVEXYMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa2)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERDPD")
+ }
+ return p
+}
+
+// VSCATTERDPS performs "Scatter Packed Single-Precision Floating-Point Values with Signed Doubleword Indices".
+//
+// Mnemonic : VSCATTERDPS
+// Supported forms : (3 forms)
+//
+// * VSCATTERDPS zmm, vm32z{k} [AVX512F]
+// * VSCATTERDPS xmm, vm32x{k} [AVX512F,AVX512VL]
+// * VSCATTERDPS ymm, vm32y{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VSCATTERDPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VSCATTERDPS", 2, Operands { v0, v1 })
+ // VSCATTERDPS zmm, vm32z{k}
+ if isZMM(v0) && isVMZk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa2)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VSCATTERDPS xmm, vm32x{k}
+ if isEVEXXMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa2)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VSCATTERDPS ymm, vm32y{k}
+ if isEVEXYMM(v0) && isVMYk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa2)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERDPS")
+ }
+ return p
+}
+
+// VSCATTERPF0DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint with Intent to Write".
+//
+// Mnemonic : VSCATTERPF0DPD
+// Supported forms : (1 form)
+//
+// * VSCATTERPF0DPD vm32y{k} [AVX512PF]
+//
+func (self *Program) VSCATTERPF0DPD(v0 interface{}) *Instruction {
+ p := self.alloc("VSCATTERPF0DPD", 1, Operands { v0 })
+ // VSCATTERPF0DPD vm32y{k}
+ if isVMYk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc6)
+ m.mrsd(5, addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERPF0DPD")
+ }
+ return p
+}
+
+// VSCATTERPF0DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint with Intent to Write".
+//
+// Mnemonic : VSCATTERPF0DPS
+// Supported forms : (1 form)
+//
+// * VSCATTERPF0DPS vm32z{k} [AVX512PF]
+//
+func (self *Program) VSCATTERPF0DPS(v0 interface{}) *Instruction {
+ p := self.alloc("VSCATTERPF0DPS", 1, Operands { v0 })
+ // VSCATTERPF0DPS vm32z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc6)
+ m.mrsd(5, addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERPF0DPS")
+ }
+ return p
+}
+
+// VSCATTERPF0QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint with Intent to Write".
+//
+// Mnemonic : VSCATTERPF0QPD
+// Supported forms : (1 form)
+//
+// * VSCATTERPF0QPD vm64z{k} [AVX512PF]
+//
+func (self *Program) VSCATTERPF0QPD(v0 interface{}) *Instruction {
+ p := self.alloc("VSCATTERPF0QPD", 1, Operands { v0 })
+ // VSCATTERPF0QPD vm64z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc7)
+ m.mrsd(5, addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERPF0QPD")
+ }
+ return p
+}
+
+// VSCATTERPF0QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint with Intent to Write".
+//
+// Mnemonic : VSCATTERPF0QPS
+// Supported forms : (1 form)
+//
+// * VSCATTERPF0QPS vm64z{k} [AVX512PF]
+//
+func (self *Program) VSCATTERPF0QPS(v0 interface{}) *Instruction {
+ p := self.alloc("VSCATTERPF0QPS", 1, Operands { v0 })
+ // VSCATTERPF0QPS vm64z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc7)
+ m.mrsd(5, addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERPF0QPS")
+ }
+ return p
+}
+
+// VSCATTERPF1DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint with Intent to Write".
+//
+// Mnemonic : VSCATTERPF1DPD
+// Supported forms : (1 form)
+//
+// * VSCATTERPF1DPD vm32y{k} [AVX512PF]
+//
+func (self *Program) VSCATTERPF1DPD(v0 interface{}) *Instruction {
+ p := self.alloc("VSCATTERPF1DPD", 1, Operands { v0 })
+ // VSCATTERPF1DPD vm32y{k}
+ if isVMYk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc6)
+ m.mrsd(6, addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERPF1DPD")
+ }
+ return p
+}
+
+// VSCATTERPF1DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint with Intent to Write".
+//
+// Mnemonic : VSCATTERPF1DPS
+// Supported forms : (1 form)
+//
+// * VSCATTERPF1DPS vm32z{k} [AVX512PF]
+//
+func (self *Program) VSCATTERPF1DPS(v0 interface{}) *Instruction {
+ p := self.alloc("VSCATTERPF1DPS", 1, Operands { v0 })
+ // VSCATTERPF1DPS vm32z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc6)
+ m.mrsd(6, addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERPF1DPS")
+ }
+ return p
+}
+
+// VSCATTERPF1QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint with Intent to Write".
+//
+// Mnemonic : VSCATTERPF1QPD
+// Supported forms : (1 form)
+//
+// * VSCATTERPF1QPD vm64z{k} [AVX512PF]
+//
+func (self *Program) VSCATTERPF1QPD(v0 interface{}) *Instruction {
+ p := self.alloc("VSCATTERPF1QPD", 1, Operands { v0 })
+ // VSCATTERPF1QPD vm64z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc7)
+ m.mrsd(6, addr(v[0]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERPF1QPD")
+ }
+ return p
+}
+
+// VSCATTERPF1QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint with Intent to Write".
+//
+// Mnemonic : VSCATTERPF1QPS
+// Supported forms : (1 form)
+//
+// * VSCATTERPF1QPS vm64z{k} [AVX512PF]
+//
+func (self *Program) VSCATTERPF1QPS(v0 interface{}) *Instruction {
+ p := self.alloc("VSCATTERPF1QPS", 1, Operands { v0 })
+ // VSCATTERPF1QPS vm64z{k}
+ if isVMZk(v0) {
+ self.require(ISA_AVX512PF)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0)
+ m.emit(0xc7)
+ m.mrsd(6, addr(v[0]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERPF1QPS")
+ }
+ return p
+}
+
+// VSCATTERQPD performs "Scatter Packed Double-Precision Floating-Point Values with Signed Quadword Indices".
+//
+// Mnemonic : VSCATTERQPD
+// Supported forms : (3 forms)
+//
+// * VSCATTERQPD zmm, vm64z{k} [AVX512F]
+// * VSCATTERQPD xmm, vm64x{k} [AVX512F,AVX512VL]
+// * VSCATTERQPD ymm, vm64y{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VSCATTERQPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VSCATTERQPD", 2, Operands { v0, v1 })
+ // VSCATTERQPD zmm, vm64z{k}
+ if isZMM(v0) && isVMZk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VSCATTERQPD xmm, vm64x{k}
+ if isEVEXXMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ // VSCATTERQPD ymm, vm64y{k}
+ if isEVEXYMM(v0) && isVMYk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 8)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERQPD")
+ }
+ return p
+}
+
+// VSCATTERQPS performs "Scatter Packed Single-Precision Floating-Point Values with Signed Quadword Indices".
+//
+// Mnemonic : VSCATTERQPS
+// Supported forms : (3 forms)
+//
+// * VSCATTERQPS ymm, vm64z{k} [AVX512F]
+// * VSCATTERQPS xmm, vm64x{k} [AVX512F,AVX512VL]
+// * VSCATTERQPS xmm, vm64y{k} [AVX512F,AVX512VL]
+//
+func (self *Program) VSCATTERQPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VSCATTERQPS", 2, Operands { v0, v1 })
+ // VSCATTERQPS ymm, vm64z{k}
+ if isEVEXYMM(v0) && isVMZk(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VSCATTERQPS xmm, vm64x{k}
+ if isEVEXXMM(v0) && isVMXk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ // VSCATTERQPS xmm, vm64y{k}
+ if isEVEXXMM(v0) && isVMYk(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0)
+ m.emit(0xa3)
+ m.mrsd(lcode(v[0]), addr(v[1]), 4)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSCATTERQPS")
+ }
+ return p
+}
+
+// VSHUFF32X4 performs "Shuffle 128-Bit Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VSHUFF32X4
+// Supported forms : (4 forms)
+//
+// * VSHUFF32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFF32X4 imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFF32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFF32X4 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSHUFF32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VSHUFF32X4", 4, Operands { v0, v1, v2, v3 })
+ // VSHUFF32X4 imm8, m512/m32bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x23)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFF32X4 imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFF32X4 imm8, m256/m32bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x23)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFF32X4 imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSHUFF32X4")
+ }
+ return p
+}
+
+// VSHUFF64X2 performs "Shuffle 128-Bit Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VSHUFF64X2
+// Supported forms : (4 forms)
+//
+// * VSHUFF64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFF64X2 imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFF64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFF64X2 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSHUFF64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VSHUFF64X2", 4, Operands { v0, v1, v2, v3 })
+ // VSHUFF64X2 imm8, m512/m64bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x23)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFF64X2 imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFF64X2 imm8, m256/m64bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x23)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFF64X2 imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x23)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSHUFF64X2")
+ }
+ return p
+}
+
+// VSHUFI32X4 performs "Shuffle 128-Bit Packed Doubleword Integer Values".
+//
+// Mnemonic : VSHUFI32X4
+// Supported forms : (4 forms)
+//
+// * VSHUFI32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFI32X4 imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFI32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFI32X4 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSHUFI32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VSHUFI32X4", 4, Operands { v0, v1, v2, v3 })
+ // VSHUFI32X4 imm8, m512/m32bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x43)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFI32X4 imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFI32X4 imm8, m256/m32bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x43)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFI32X4 imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7d ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSHUFI32X4")
+ }
+ return p
+}
+
+// VSHUFI64X2 performs "Shuffle 128-Bit Packed Quadword Integer Values".
+//
+// Mnemonic : VSHUFI64X2
+// Supported forms : (4 forms)
+//
+// * VSHUFI64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFI64X2 imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFI64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFI64X2 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSHUFI64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VSHUFI64X2", 4, Operands { v0, v1, v2, v3 })
+ // VSHUFI64X2 imm8, m512/m64bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x43)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFI64X2 imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFI64X2 imm8, m256/m64bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0x43)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFI64X2 imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0x43)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSHUFI64X2")
+ }
+ return p
+}
+
+// VSHUFPD performs "Shuffle Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VSHUFPD
+// Supported forms : (10 forms)
+//
+// * VSHUFPD imm8, xmm, xmm, xmm [AVX]
+// * VSHUFPD imm8, m128, xmm, xmm [AVX]
+// * VSHUFPD imm8, ymm, ymm, ymm [AVX]
+// * VSHUFPD imm8, m256, ymm, ymm [AVX]
+// * VSHUFPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFPD imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFPD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFPD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSHUFPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VSHUFPD", 4, Operands { v0, v1, v2, v3 })
+ // VSHUFPD imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPD imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPD imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPD imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPD imm8, m512/m64bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPD imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPD imm8, m128/m64bcst, xmm, xmm{k}{z}
+ if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPD imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPD imm8, m256/m64bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPD imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSHUFPD")
+ }
+ return p
+}
+
+// VSHUFPS performs "Shuffle Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VSHUFPS
+// Supported forms : (10 forms)
+//
+// * VSHUFPS imm8, xmm, xmm, xmm [AVX]
+// * VSHUFPS imm8, m128, xmm, xmm [AVX]
+// * VSHUFPS imm8, ymm, ymm, ymm [AVX]
+// * VSHUFPS imm8, m256, ymm, ymm [AVX]
+// * VSHUFPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFPS imm8, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSHUFPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFPS imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSHUFPS imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSHUFPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction {
+ p := self.alloc("VSHUFPS", 4, Operands { v0, v1, v2, v3 })
+ // VSHUFPS imm8, xmm, xmm, xmm
+ if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPS imm8, m128, xmm, xmm
+ if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPS imm8, ymm, ymm, ymm
+ if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[3]), v[1], hlcode(v[2]))
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPS imm8, m256, ymm, ymm
+ if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[3]), addr(v[1]), hlcode(v[2]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPS imm8, m512/m32bcst, zmm, zmm{k}{z}
+ if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 64)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPS imm8, zmm, zmm, zmm{k}{z}
+ if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40)
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPS imm8, m128/m32bcst, xmm, xmm{k}{z}
+ if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 16)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPS imm8, xmm, xmm, xmm{k}{z}
+ if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00)
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPS imm8, m256/m32bcst, ymm, ymm{k}{z}
+ if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1]))
+ m.emit(0xc6)
+ m.mrsd(lcode(v[3]), addr(v[1]), 32)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // VSHUFPS imm8, ymm, ymm, ymm{k}{z}
+ if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20)
+ m.emit(0xc6)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSHUFPS")
+ }
+ return p
+}
+
+// VSQRTPD performs "Compute Square Roots of Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VSQRTPD
+// Supported forms : (11 forms)
+//
+// * VSQRTPD xmm, xmm [AVX]
+// * VSQRTPD m128, xmm [AVX]
+// * VSQRTPD ymm, ymm [AVX]
+// * VSQRTPD m256, ymm [AVX]
+// * VSQRTPD m512/m64bcst, zmm{k}{z} [AVX512F]
+// * VSQRTPD {er}, zmm, zmm{k}{z} [AVX512F]
+// * VSQRTPD zmm, zmm{k}{z} [AVX512F]
+// * VSQRTPD m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSQRTPD m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSQRTPD xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSQRTPD ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSQRTPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSQRTPD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VSQRTPD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VSQRTPD takes 2 or 3 operands")
+ }
+ // VSQRTPD xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTPD m128, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VSQRTPD ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), v[0], 0)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTPD m256, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VSQRTPD m512/m64bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VSQRTPD {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSQRTPD zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTPD m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VSQRTPD m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VSQRTPD xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTPD ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSQRTPD")
+ }
+ return p
+}
+
+// VSQRTPS performs "Compute Square Roots of Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VSQRTPS
+// Supported forms : (11 forms)
+//
+// * VSQRTPS xmm, xmm [AVX]
+// * VSQRTPS m128, xmm [AVX]
+// * VSQRTPS ymm, ymm [AVX]
+// * VSQRTPS m256, ymm [AVX]
+// * VSQRTPS m512/m32bcst, zmm{k}{z} [AVX512F]
+// * VSQRTPS {er}, zmm, zmm{k}{z} [AVX512F]
+// * VSQRTPS zmm, zmm{k}{z} [AVX512F]
+// * VSQRTPS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSQRTPS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSQRTPS xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSQRTPS ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSQRTPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSQRTPS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VSQRTPS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VSQRTPS takes 2 or 3 operands")
+ }
+ // VSQRTPS xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTPS m128, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VSQRTPS ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), v[0], 0)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTPS m256, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VSQRTPS m512/m32bcst, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 64)
+ })
+ }
+ // VSQRTPS {er}, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSQRTPS zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTPS m128/m32bcst, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 16)
+ })
+ }
+ // VSQRTPS m256/m32bcst, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0]))
+ m.emit(0x51)
+ m.mrsd(lcode(v[1]), addr(v[0]), 32)
+ })
+ }
+ // VSQRTPS xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTPS ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSQRTPS")
+ }
+ return p
+}
+
+// VSQRTSD performs "Compute Square Root of Scalar Double-Precision Floating-Point Value".
+//
+// Mnemonic : VSQRTSD
+// Supported forms : (5 forms)
+//
+// * VSQRTSD xmm, xmm, xmm [AVX]
+// * VSQRTSD m64, xmm, xmm [AVX]
+// * VSQRTSD m64, xmm, xmm{k}{z} [AVX512F]
+// * VSQRTSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VSQRTSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VSQRTSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSQRTSD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSQRTSD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSQRTSD takes 3 or 4 operands")
+ }
+ // VSQRTSD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTSD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x51)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VSQRTSD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x51)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VSQRTSD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSQRTSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSQRTSD")
+ }
+ return p
+}
+
+// VSQRTSS performs "Compute Square Root of Scalar Single-Precision Floating-Point Value".
+//
+// Mnemonic : VSQRTSS
+// Supported forms : (5 forms)
+//
+// * VSQRTSS xmm, xmm, xmm [AVX]
+// * VSQRTSS m32, xmm, xmm [AVX]
+// * VSQRTSS m32, xmm, xmm{k}{z} [AVX512F]
+// * VSQRTSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VSQRTSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VSQRTSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSQRTSS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSQRTSS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSQRTSS takes 3 or 4 operands")
+ }
+ // VSQRTSS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSQRTSS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x51)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VSQRTSS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x51)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VSQRTSS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSQRTSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x51)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSQRTSS")
+ }
+ return p
+}
+
+// VSTMXCSR performs "Store MXCSR Register State".
+//
+// Mnemonic : VSTMXCSR
+// Supported forms : (1 form)
+//
+// * VSTMXCSR m32 [AVX]
+//
+func (self *Program) VSTMXCSR(v0 interface{}) *Instruction {
+ p := self.alloc("VSTMXCSR", 1, Operands { v0 })
+ // VSTMXCSR m32
+ if isM32(v0) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, addr(v[0]), 0)
+ m.emit(0xae)
+ m.mrsd(3, addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSTMXCSR")
+ }
+ return p
+}
+
+// VSUBPD performs "Subtract Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VSUBPD
+// Supported forms : (11 forms)
+//
+// * VSUBPD xmm, xmm, xmm [AVX]
+// * VSUBPD m128, xmm, xmm [AVX]
+// * VSUBPD ymm, ymm, ymm [AVX]
+// * VSUBPD m256, ymm, ymm [AVX]
+// * VSUBPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSUBPD {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSUBPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSUBPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSUBPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSUBPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSUBPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSUBPD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSUBPD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSUBPD takes 3 or 4 operands")
+ }
+ // VSUBPD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBPD m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VSUBPD ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBPD m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VSUBPD m512/m64bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VSUBPD {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSUBPD zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBPD m128/m64bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VSUBPD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBPD m256/m64bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VSUBPD ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSUBPD")
+ }
+ return p
+}
+
+// VSUBPS performs "Subtract Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VSUBPS
+// Supported forms : (11 forms)
+//
+// * VSUBPS xmm, xmm, xmm [AVX]
+// * VSUBPS m128, xmm, xmm [AVX]
+// * VSUBPS ymm, ymm, ymm [AVX]
+// * VSUBPS m256, ymm, ymm [AVX]
+// * VSUBPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VSUBPS {er}, zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSUBPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VSUBPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSUBPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VSUBPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VSUBPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSUBPS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSUBPS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSUBPS takes 3 or 4 operands")
+ }
+ // VSUBPS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBPS m128, xmm, xmm
+ if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VSUBPS ymm, ymm, ymm
+ if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBPS m256, ymm, ymm
+ if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VSUBPS m512/m32bcst, zmm, zmm{k}{z}
+ if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VSUBPS {er}, zmm, zmm, zmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSUBPS zmm, zmm, zmm{k}{z}
+ if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBPS m128/m32bcst, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VSUBPS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBPS m256/m32bcst, ymm, ymm{k}{z}
+ if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VSUBPS ymm, ymm, ymm{k}{z}
+ if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSUBPS")
+ }
+ return p
+}
+
+// VSUBSD performs "Subtract Scalar Double-Precision Floating-Point Values".
+//
+// Mnemonic : VSUBSD
+// Supported forms : (5 forms)
+//
+// * VSUBSD xmm, xmm, xmm [AVX]
+// * VSUBSD m64, xmm, xmm [AVX]
+// * VSUBSD m64, xmm, xmm{k}{z} [AVX512F]
+// * VSUBSD {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VSUBSD xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSUBSD", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSUBSD", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSUBSD takes 3 or 4 operands")
+ }
+ // VSUBSD xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBSD m64, xmm, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VSUBSD m64, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 8)
+ })
+ }
+ // VSUBSD {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0xff ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSUBSD xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xff ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSUBSD")
+ }
+ return p
+}
+
+// VSUBSS performs "Subtract Scalar Single-Precision Floating-Point Values".
+//
+// Mnemonic : VSUBSS
+// Supported forms : (5 forms)
+//
+// * VSUBSS xmm, xmm, xmm [AVX]
+// * VSUBSS m32, xmm, xmm [AVX]
+// * VSUBSS m32, xmm, xmm{k}{z} [AVX512F]
+// * VSUBSS {er}, xmm, xmm, xmm{k}{z} [AVX512F]
+// * VSUBSS xmm, xmm, xmm{k}{z} [AVX512F]
+//
+func (self *Program) VSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VSUBSS", 3, Operands { v0, v1, v2 })
+ case 1 : p = self.alloc("VSUBSS", 4, Operands { v0, v1, v2, vv[0] })
+ default : panic("instruction VSUBSS takes 3 or 4 operands")
+ }
+ // VSUBSS xmm, xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VSUBSS m32, xmm, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VSUBSS m32, xmm, xmm{k}{z}
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0)
+ m.emit(0x5c)
+ m.mrsd(lcode(v[2]), addr(v[0]), 4)
+ })
+ }
+ // VSUBSS {er}, xmm, xmm, xmm{k}{z}
+ if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[2]) << 3))
+ m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1]))
+ })
+ }
+ // VSUBSS xmm, xmm, xmm{k}{z}
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7e ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x5c)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VSUBSS")
+ }
+ return p
+}
+
+// VTESTPD performs "Packed Double-Precision Floating-Point Bit Test".
+//
+// Mnemonic : VTESTPD
+// Supported forms : (4 forms)
+//
+// * VTESTPD xmm, xmm [AVX]
+// * VTESTPD m128, xmm [AVX]
+// * VTESTPD ymm, ymm [AVX]
+// * VTESTPD m256, ymm [AVX]
+//
+func (self *Program) VTESTPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VTESTPD", 2, Operands { v0, v1 })
+ // VTESTPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VTESTPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VTESTPD ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x0f)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VTESTPD m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x0f)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VTESTPD")
+ }
+ return p
+}
+
+// VTESTPS performs "Packed Single-Precision Floating-Point Bit Test".
+//
+// Mnemonic : VTESTPS
+// Supported forms : (4 forms)
+//
+// * VTESTPS xmm, xmm [AVX]
+// * VTESTPS m128, xmm [AVX]
+// * VTESTPS ymm, ymm [AVX]
+// * VTESTPS m256, ymm [AVX]
+//
+func (self *Program) VTESTPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("VTESTPS", 2, Operands { v0, v1 })
+ // VTESTPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x79)
+ m.emit(0x0e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VTESTPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x0e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VTESTPS ymm, ymm
+ if isYMM(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xc4)
+ m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5))
+ m.emit(0x7d)
+ m.emit(0x0e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VTESTPS m256, ymm
+ if isM256(v0) && isYMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x0e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VTESTPS")
+ }
+ return p
+}
+
+// VUCOMISD performs "Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS".
+//
+// Mnemonic : VUCOMISD
+// Supported forms : (5 forms)
+//
+// * VUCOMISD xmm, xmm [AVX]
+// * VUCOMISD m64, xmm [AVX]
+// * VUCOMISD m64, xmm [AVX512F]
+// * VUCOMISD {sae}, xmm, xmm [AVX512F]
+// * VUCOMISD xmm, xmm [AVX512F]
+//
+func (self *Program) VUCOMISD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VUCOMISD", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VUCOMISD", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VUCOMISD takes 2 or 3 operands")
+ }
+ // VUCOMISD xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), v[0], 0)
+ m.emit(0x2e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUCOMISD m64, xmm
+ if len(vv) == 0 && isM64(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VUCOMISD m64, xmm
+ if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 8)
+ })
+ }
+ // VUCOMISD {sae}, xmm, xmm
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd)
+ m.emit(0x18)
+ m.emit(0x2e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VUCOMISD xmm, xmm
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0xfd)
+ m.emit(0x48)
+ m.emit(0x2e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VUCOMISD")
+ }
+ return p
+}
+
+// VUCOMISS performs "Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS".
+//
+// Mnemonic : VUCOMISS
+// Supported forms : (5 forms)
+//
+// * VUCOMISS xmm, xmm [AVX]
+// * VUCOMISS m32, xmm [AVX]
+// * VUCOMISS m32, xmm [AVX512F]
+// * VUCOMISS {sae}, xmm, xmm [AVX512F]
+// * VUCOMISS xmm, xmm [AVX512F]
+//
+func (self *Program) VUCOMISS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction {
+ var p *Instruction
+ switch len(vv) {
+ case 0 : p = self.alloc("VUCOMISS", 2, Operands { v0, v1 })
+ case 1 : p = self.alloc("VUCOMISS", 3, Operands { v0, v1, vv[0] })
+ default : panic("instruction VUCOMISS takes 2 or 3 operands")
+ }
+ // VUCOMISS xmm, xmm
+ if len(vv) == 0 && isXMM(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), v[0], 0)
+ m.emit(0x2e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUCOMISS m32, xmm
+ if len(vv) == 0 && isM32(v0) && isXMM(v1) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[1]), addr(v[0]), 0)
+ m.emit(0x2e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // VUCOMISS m32, xmm
+ if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0)
+ m.emit(0x2e)
+ m.mrsd(lcode(v[1]), addr(v[0]), 4)
+ })
+ }
+ // VUCOMISS {sae}, xmm, xmm
+ if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c)
+ m.emit(0x18)
+ m.emit(0x2e)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1]))
+ })
+ }
+ // VUCOMISS xmm, xmm
+ if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4)))
+ m.emit(0x7c)
+ m.emit(0x48)
+ m.emit(0x2e)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VUCOMISS")
+ }
+ return p
+}
+
+// VUNPCKHPD performs "Unpack and Interleave High Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VUNPCKHPD
+// Supported forms : (10 forms)
+//
+// * VUNPCKHPD xmm, xmm, xmm [AVX]
+// * VUNPCKHPD m128, xmm, xmm [AVX]
+// * VUNPCKHPD ymm, ymm, ymm [AVX]
+// * VUNPCKHPD m256, ymm, ymm [AVX]
+// * VUNPCKHPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VUNPCKHPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VUNPCKHPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKHPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKHPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKHPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VUNPCKHPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VUNPCKHPD", 3, Operands { v0, v1, v2 })
+ // VUNPCKHPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKHPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VUNPCKHPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKHPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VUNPCKHPD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VUNPCKHPD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKHPD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VUNPCKHPD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKHPD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VUNPCKHPD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VUNPCKHPD")
+ }
+ return p
+}
+
+// VUNPCKHPS performs "Unpack and Interleave High Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VUNPCKHPS
+// Supported forms : (10 forms)
+//
+// * VUNPCKHPS xmm, xmm, xmm [AVX]
+// * VUNPCKHPS m128, xmm, xmm [AVX]
+// * VUNPCKHPS ymm, ymm, ymm [AVX]
+// * VUNPCKHPS m256, ymm, ymm [AVX]
+// * VUNPCKHPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VUNPCKHPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VUNPCKHPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKHPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKHPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKHPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VUNPCKHPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VUNPCKHPS", 3, Operands { v0, v1, v2 })
+ // VUNPCKHPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKHPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VUNPCKHPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKHPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VUNPCKHPS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VUNPCKHPS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKHPS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VUNPCKHPS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKHPS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x15)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VUNPCKHPS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x15)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VUNPCKHPS")
+ }
+ return p
+}
+
+// VUNPCKLPD performs "Unpack and Interleave Low Packed Double-Precision Floating-Point Values".
+//
+// Mnemonic : VUNPCKLPD
+// Supported forms : (10 forms)
+//
+// * VUNPCKLPD xmm, xmm, xmm [AVX]
+// * VUNPCKLPD m128, xmm, xmm [AVX]
+// * VUNPCKLPD ymm, ymm, ymm [AVX]
+// * VUNPCKLPD m256, ymm, ymm [AVX]
+// * VUNPCKLPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F]
+// * VUNPCKLPD zmm, zmm, zmm{k}{z} [AVX512F]
+// * VUNPCKLPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKLPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKLPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKLPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VUNPCKLPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VUNPCKLPD", 3, Operands { v0, v1, v2 })
+ // VUNPCKLPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKLPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VUNPCKLPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKLPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VUNPCKLPD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VUNPCKLPD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKLPD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VUNPCKLPD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKLPD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VUNPCKLPD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VUNPCKLPD")
+ }
+ return p
+}
+
+// VUNPCKLPS performs "Unpack and Interleave Low Packed Single-Precision Floating-Point Values".
+//
+// Mnemonic : VUNPCKLPS
+// Supported forms : (10 forms)
+//
+// * VUNPCKLPS xmm, xmm, xmm [AVX]
+// * VUNPCKLPS m128, xmm, xmm [AVX]
+// * VUNPCKLPS ymm, ymm, ymm [AVX]
+// * VUNPCKLPS m256, ymm, ymm [AVX]
+// * VUNPCKLPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F]
+// * VUNPCKLPS zmm, zmm, zmm{k}{z} [AVX512F]
+// * VUNPCKLPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKLPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKLPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+// * VUNPCKLPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL]
+//
+func (self *Program) VUNPCKLPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VUNPCKLPS", 3, Operands { v0, v1, v2 })
+ // VUNPCKLPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKLPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VUNPCKLPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKLPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VUNPCKLPS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VUNPCKLPS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKLPS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VUNPCKLPS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VUNPCKLPS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x14)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VUNPCKLPS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512F)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x14)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VUNPCKLPS")
+ }
+ return p
+}
+
+// VXORPD performs "Bitwise Logical XOR for Double-Precision Floating-Point Values".
+//
+// Mnemonic : VXORPD
+// Supported forms : (10 forms)
+//
+// * VXORPD xmm, xmm, xmm [AVX]
+// * VXORPD m128, xmm, xmm [AVX]
+// * VXORPD ymm, ymm, ymm [AVX]
+// * VXORPD m256, ymm, ymm [AVX]
+// * VXORPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VXORPD zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VXORPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VXORPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VXORPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VXORPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VXORPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VXORPD", 3, Operands { v0, v1, v2 })
+ // VXORPD xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VXORPD m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VXORPD ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VXORPD m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VXORPD m512/m64bcst, zmm, zmm{k}{z}
+ if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VXORPD zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VXORPD m128/m64bcst, xmm, xmm{k}{z}
+ if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VXORPD xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VXORPD m256/m64bcst, ymm, ymm{k}{z}
+ if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VXORPD ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0xfd ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VXORPD")
+ }
+ return p
+}
+
+// VXORPS performs "Bitwise Logical XOR for Single-Precision Floating-Point Values".
+//
+// Mnemonic : VXORPS
+// Supported forms : (10 forms)
+//
+// * VXORPS xmm, xmm, xmm [AVX]
+// * VXORPS m128, xmm, xmm [AVX]
+// * VXORPS ymm, ymm, ymm [AVX]
+// * VXORPS m256, ymm, ymm [AVX]
+// * VXORPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ]
+// * VXORPS zmm, zmm, zmm{k}{z} [AVX512DQ]
+// * VXORPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VXORPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL]
+// * VXORPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+// * VXORPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL]
+//
+func (self *Program) VXORPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction {
+ p := self.alloc("VXORPS", 3, Operands { v0, v1, v2 })
+ // VXORPS xmm, xmm, xmm
+ if isXMM(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VXORPS m128, xmm, xmm
+ if isM128(v0) && isXMM(v1) && isXMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VXORPS ymm, ymm, ymm
+ if isYMM(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), v[0], hlcode(v[1]))
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VXORPS m256, ymm, ymm
+ if isM256(v0) && isYMM(v1) && isYMM(v2) {
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 1)
+ })
+ }
+ // VXORPS m512/m32bcst, zmm, zmm{k}{z}
+ if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 64)
+ })
+ }
+ // VXORPS zmm, zmm, zmm{k}{z}
+ if isZMM(v0) && isZMM(v1) && isZMMkz(v2) {
+ self.require(ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VXORPS m128/m32bcst, xmm, xmm{k}{z}
+ if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 16)
+ })
+ }
+ // VXORPS xmm, xmm, xmm{k}{z}
+ if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ // VXORPS m256/m32bcst, ymm, ymm{k}{z}
+ if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0]))
+ m.emit(0x57)
+ m.mrsd(lcode(v[2]), addr(v[0]), 32)
+ })
+ }
+ // VXORPS ymm, ymm, ymm{k}{z}
+ if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) {
+ self.require(ISA_AVX512VL | ISA_AVX512DQ)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x62)
+ m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4)))
+ m.emit(0x7c ^ (hlcode(v[1]) << 3))
+ m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0]))
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for VXORPS")
+ }
+ return p
+}
+
+// VZEROALL performs "Zero All YMM Registers".
+//
+// Mnemonic : VZEROALL
+// Supported forms : (1 form)
+//
+// * VZEROALL [AVX]
+//
+func (self *Program) VZEROALL() *Instruction {
+ p := self.alloc("VZEROALL", 0, Operands { })
+ // VZEROALL
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(4, 0, nil, 0)
+ m.emit(0x77)
+ })
+ return p
+}
+
+// VZEROUPPER performs "Zero Upper Bits of YMM Registers".
+//
+// Mnemonic : VZEROUPPER
+// Supported forms : (1 form)
+//
+// * VZEROUPPER [AVX]
+//
+func (self *Program) VZEROUPPER() *Instruction {
+ p := self.alloc("VZEROUPPER", 0, Operands { })
+ // VZEROUPPER
+ self.require(ISA_AVX)
+ p.domain = DomainAVX
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.vex2(0, 0, nil, 0)
+ m.emit(0x77)
+ })
+ return p
+}
+
+// XADDB performs "Exchange and Add".
+//
+// Mnemonic : XADD
+// Supported forms : (2 forms)
+//
+// * XADDB r8, r8
+// * XADDB r8, m8
+//
+func (self *Program) XADDB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XADDB", 2, Operands { v0, v1 })
+ // XADDB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x0f)
+ m.emit(0xc0)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // XADDB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x0f)
+ m.emit(0xc0)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XADDB")
+ }
+ return p
+}
+
+// XADDL performs "Exchange and Add".
+//
+// Mnemonic : XADD
+// Supported forms : (2 forms)
+//
+// * XADDL r32, r32
+// * XADDL r32, m32
+//
+func (self *Program) XADDL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XADDL", 2, Operands { v0, v1 })
+ // XADDL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc1)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // XADDL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XADDL")
+ }
+ return p
+}
+
+// XADDQ performs "Exchange and Add".
+//
+// Mnemonic : XADD
+// Supported forms : (2 forms)
+//
+// * XADDQ r64, r64
+// * XADDQ r64, m64
+//
+func (self *Program) XADDQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XADDQ", 2, Operands { v0, v1 })
+ // XADDQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x0f)
+ m.emit(0xc1)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // XADDQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x0f)
+ m.emit(0xc1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XADDQ")
+ }
+ return p
+}
+
+// XADDW performs "Exchange and Add".
+//
+// Mnemonic : XADD
+// Supported forms : (2 forms)
+//
+// * XADDW r16, r16
+// * XADDW r16, m16
+//
+func (self *Program) XADDW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XADDW", 2, Operands { v0, v1 })
+ // XADDW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x0f)
+ m.emit(0xc1)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ }
+ // XADDW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x0f)
+ m.emit(0xc1)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XADDW")
+ }
+ return p
+}
+
+// XCHGB performs "Exchange Register/Memory with Register".
+//
+// Mnemonic : XCHG
+// Supported forms : (3 forms)
+//
+// * XCHGB r8, r8
+// * XCHGB m8, r8
+// * XCHGB r8, m8
+//
+func (self *Program) XCHGB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XCHGB", 2, Operands { v0, v1 })
+ // XCHGB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x86)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x86)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XCHGB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x86)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // XCHGB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x86)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XCHGB")
+ }
+ return p
+}
+
+// XCHGL performs "Exchange Register/Memory with Register".
+//
+// Mnemonic : XCHG
+// Supported forms : (5 forms)
+//
+// * XCHGL r32, eax
+// * XCHGL eax, r32
+// * XCHGL r32, r32
+// * XCHGL m32, r32
+// * XCHGL r32, m32
+//
+func (self *Program) XCHGL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XCHGL", 2, Operands { v0, v1 })
+ // XCHGL r32, eax
+ if isReg32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[0], false)
+ m.emit(0x90 | lcode(v[0]))
+ })
+ }
+ // XCHGL eax, r32
+ if v0 == EAX && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x90 | lcode(v[1]))
+ })
+ }
+ // XCHGL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x87)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x87)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XCHGL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x87)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // XCHGL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x87)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XCHGL")
+ }
+ return p
+}
+
+// XCHGQ performs "Exchange Register/Memory with Register".
+//
+// Mnemonic : XCHG
+// Supported forms : (5 forms)
+//
+// * XCHGQ r64, rax
+// * XCHGQ rax, r64
+// * XCHGQ r64, r64
+// * XCHGQ m64, r64
+// * XCHGQ r64, m64
+//
+func (self *Program) XCHGQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XCHGQ", 2, Operands { v0, v1 })
+ // XCHGQ r64, rax
+ if isReg64(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]))
+ m.emit(0x90 | lcode(v[0]))
+ })
+ }
+ // XCHGQ rax, r64
+ if v0 == RAX && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x90 | lcode(v[1]))
+ })
+ }
+ // XCHGQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x87)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x87)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XCHGQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x87)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // XCHGQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x87)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XCHGQ")
+ }
+ return p
+}
+
+// XCHGW performs "Exchange Register/Memory with Register".
+//
+// Mnemonic : XCHG
+// Supported forms : (5 forms)
+//
+// * XCHGW r16, ax
+// * XCHGW ax, r16
+// * XCHGW r16, r16
+// * XCHGW m16, r16
+// * XCHGW r16, m16
+//
+func (self *Program) XCHGW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XCHGW", 2, Operands { v0, v1 })
+ // XCHGW r16, ax
+ if isReg16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[0], false)
+ m.emit(0x90 | lcode(v[0]))
+ })
+ }
+ // XCHGW ax, r16
+ if v0 == AX && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x90 | lcode(v[1]))
+ })
+ }
+ // XCHGW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x87)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x87)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XCHGW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x87)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // XCHGW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x87)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XCHGW")
+ }
+ return p
+}
+
+// XGETBV performs "Get Value of Extended Control Register".
+//
+// Mnemonic : XGETBV
+// Supported forms : (1 form)
+//
+// * XGETBV
+//
+func (self *Program) XGETBV() *Instruction {
+ p := self.alloc("XGETBV", 0, Operands { })
+ // XGETBV
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x0f)
+ m.emit(0x01)
+ m.emit(0xd0)
+ })
+ return p
+}
+
+// XLATB performs "Table Look-up Translation".
+//
+// Mnemonic : XLATB
+// Supported forms : (2 forms)
+//
+// * XLATB
+// * XLATB
+//
+func (self *Program) XLATB() *Instruction {
+ p := self.alloc("XLATB", 0, Operands { })
+ // XLATB
+ p.domain = DomainMisc
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0xd7)
+ })
+ // XLATB
+ p.domain = DomainMisc
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0xd7)
+ })
+ return p
+}
+
+// XORB performs "Logical Exclusive OR".
+//
+// Mnemonic : XOR
+// Supported forms : (6 forms)
+//
+// * XORB imm8, al
+// * XORB imm8, r8
+// * XORB r8, r8
+// * XORB m8, r8
+// * XORB imm8, m8
+// * XORB r8, m8
+//
+func (self *Program) XORB(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XORB", 2, Operands { v0, v1 })
+ // XORB imm8, al
+ if isImm8(v0) && v1 == AL {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x34)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // XORB imm8, r8
+ if isImm8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], isReg8REX(v[1]))
+ m.emit(0x80)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // XORB r8, r8
+ if isReg8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x30)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1]))
+ m.emit(0x32)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XORB m8, r8
+ if isM8(v0) && isReg8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1]))
+ m.emit(0x32)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // XORB imm8, m8
+ if isImm8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x80)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // XORB r8, m8
+ if isReg8(v0) && isM8(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0]))
+ m.emit(0x30)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XORB")
+ }
+ return p
+}
+
+// XORL performs "Logical Exclusive OR".
+//
+// Mnemonic : XOR
+// Supported forms : (8 forms)
+//
+// * XORL imm32, eax
+// * XORL imm8, r32
+// * XORL imm32, r32
+// * XORL r32, r32
+// * XORL m32, r32
+// * XORL imm8, m32
+// * XORL imm32, m32
+// * XORL r32, m32
+//
+func (self *Program) XORL(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XORL", 2, Operands { v0, v1 })
+ // XORL imm32, eax
+ if isImm32(v0) && v1 == EAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x35)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // XORL imm8, r32
+ if isImm8Ext(v0, 4) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // XORL imm32, r32
+ if isImm32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // XORL r32, r32
+ if isReg32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XORL m32, r32
+ if isM32(v0) && isReg32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x33)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // XORL imm8, m32
+ if isImm8Ext(v0, 4) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // XORL imm32, m32
+ if isImm32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // XORL r32, m32
+ if isReg32(v0) && isM32(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x31)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XORL")
+ }
+ return p
+}
+
+// XORPD performs "Bitwise Logical XOR for Double-Precision Floating-Point Values".
+//
+// Mnemonic : XORPD
+// Supported forms : (2 forms)
+//
+// * XORPD xmm, xmm [SSE2]
+// * XORPD m128, xmm [SSE2]
+//
+func (self *Program) XORPD(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XORPD", 2, Operands { v0, v1 })
+ // XORPD xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XORPD m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE2)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x57)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XORPD")
+ }
+ return p
+}
+
+// XORPS performs "Bitwise Logical XOR for Single-Precision Floating-Point Values".
+//
+// Mnemonic : XORPS
+// Supported forms : (2 forms)
+//
+// * XORPS xmm, xmm [SSE]
+// * XORPS m128, xmm [SSE]
+//
+func (self *Program) XORPS(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XORPS", 2, Operands { v0, v1 })
+ // XORPS xmm, xmm
+ if isXMM(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x0f)
+ m.emit(0x57)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XORPS m128, xmm
+ if isM128(v0) && isXMM(v1) {
+ self.require(ISA_SSE)
+ p.domain = DomainMMXSSE
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x0f)
+ m.emit(0x57)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XORPS")
+ }
+ return p
+}
+
+// XORQ performs "Logical Exclusive OR".
+//
+// Mnemonic : XOR
+// Supported forms : (8 forms)
+//
+// * XORQ imm32, rax
+// * XORQ imm8, r64
+// * XORQ imm32, r64
+// * XORQ r64, r64
+// * XORQ m64, r64
+// * XORQ imm8, m64
+// * XORQ imm32, m64
+// * XORQ r64, m64
+//
+func (self *Program) XORQ(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XORQ", 2, Operands { v0, v1 })
+ // XORQ imm32, rax
+ if isImm32(v0) && v1 == RAX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48)
+ m.emit(0x35)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // XORQ imm8, r64
+ if isImm8Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x83)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // XORQ imm32, r64
+ if isImm32Ext(v0, 8) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]))
+ m.emit(0x81)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // XORQ r64, r64
+ if isReg64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1]))
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0]))
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XORQ m64, r64
+ if isM64(v0) && isReg64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[1]), addr(v[0]))
+ m.emit(0x33)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // XORQ imm8, m64
+ if isImm8Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x83)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // XORQ imm32, m64
+ if isImm32Ext(v0, 8) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, 0, addr(v[1]))
+ m.emit(0x81)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm4(toImmAny(v[0]))
+ })
+ }
+ // XORQ r64, m64
+ if isReg64(v0) && isM64(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.rexm(1, hcode(v[0]), addr(v[1]))
+ m.emit(0x31)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XORQ")
+ }
+ return p
+}
+
+// XORW performs "Logical Exclusive OR".
+//
+// Mnemonic : XOR
+// Supported forms : (8 forms)
+//
+// * XORW imm16, ax
+// * XORW imm8, r16
+// * XORW imm16, r16
+// * XORW r16, r16
+// * XORW m16, r16
+// * XORW imm8, m16
+// * XORW imm16, m16
+// * XORW r16, m16
+//
+func (self *Program) XORW(v0 interface{}, v1 interface{}) *Instruction {
+ p := self.alloc("XORW", 2, Operands { v0, v1 })
+ // XORW imm16, ax
+ if isImm16(v0) && v1 == AX {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.emit(0x35)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // XORW imm8, r16
+ if isImm8Ext(v0, 2) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x83)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // XORW imm16, r16
+ if isImm16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, v[1], false)
+ m.emit(0x81)
+ m.emit(0xf0 | lcode(v[1]))
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // XORW r16, r16
+ if isReg16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), v[1], false)
+ m.emit(0x31)
+ m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1]))
+ })
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), v[0], false)
+ m.emit(0x33)
+ m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0]))
+ })
+ }
+ // XORW m16, r16
+ if isM16(v0) && isReg16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[1]), addr(v[0]), false)
+ m.emit(0x33)
+ m.mrsd(lcode(v[1]), addr(v[0]), 1)
+ })
+ }
+ // XORW imm8, m16
+ if isImm8Ext(v0, 2) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x83)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm1(toImmAny(v[0]))
+ })
+ }
+ // XORW imm16, m16
+ if isImm16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(0, addr(v[1]), false)
+ m.emit(0x81)
+ m.mrsd(6, addr(v[1]), 1)
+ m.imm2(toImmAny(v[0]))
+ })
+ }
+ // XORW r16, m16
+ if isReg16(v0) && isM16(v1) {
+ p.domain = DomainGeneric
+ p.add(0, func(m *_Encoding, v []interface{}) {
+ m.emit(0x66)
+ m.rexo(hcode(v[0]), addr(v[1]), false)
+ m.emit(0x31)
+ m.mrsd(lcode(v[0]), addr(v[1]), 1)
+ })
+ }
+ if p.len == 0 {
+ panic("invalid operands for XORW")
+ }
+ return p
+}