diff options
| author | 2024-05-06 08:50:47 +0000 | |
|---|---|---|
| committer | 2024-05-06 08:50:47 +0000 | |
| commit | a5f28fe0c923984c263592e82bbce99b0032b794 (patch) | |
| tree | 403544ad5305eb171a85d2b4c59559f83abd87a7 /vendor/github.com/cloudwego/iasm/x86_64/instructions.go | |
| parent | [chore]: Bump golang.org/x/image from 0.15.0 to 0.16.0 (#2898) (diff) | |
| download | gotosocial-a5f28fe0c923984c263592e82bbce99b0032b794.tar.xz | |
[chore]: Bump github.com/gin-contrib/gzip from 1.0.0 to 1.0.1 (#2899)
Bumps [github.com/gin-contrib/gzip](https://github.com/gin-contrib/gzip) from 1.0.0 to 1.0.1.
- [Release notes](https://github.com/gin-contrib/gzip/releases)
- [Changelog](https://github.com/gin-contrib/gzip/blob/master/.goreleaser.yaml)
- [Commits](https://github.com/gin-contrib/gzip/compare/v1.0.0...v1.0.1)
---
updated-dependencies:
- dependency-name: github.com/gin-contrib/gzip
dependency-type: direct:production
update-type: version-update:semver-patch
...
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Diffstat (limited to 'vendor/github.com/cloudwego/iasm/x86_64/instructions.go')
| -rw-r--r-- | vendor/github.com/cloudwego/iasm/x86_64/instructions.go | 97210 |
1 files changed, 97210 insertions, 0 deletions
diff --git a/vendor/github.com/cloudwego/iasm/x86_64/instructions.go b/vendor/github.com/cloudwego/iasm/x86_64/instructions.go new file mode 100644 index 000000000..d9c069035 --- /dev/null +++ b/vendor/github.com/cloudwego/iasm/x86_64/instructions.go @@ -0,0 +1,97210 @@ +// +// Copyright 2024 CloudWeGo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Code generated by "mkasm_amd64.py", DO NOT EDIT. + +package x86_64 + +// ADCB performs "Add with Carry". +// +// Mnemonic : ADC +// Supported forms : (6 forms) +// +// * ADCB imm8, al +// * ADCB imm8, r8 +// * ADCB r8, r8 +// * ADCB m8, r8 +// * ADCB imm8, m8 +// * ADCB r8, m8 +// +func (self *Program) ADCB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCB", 2, Operands { v0, v1 }) + // ADCB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x14) + m.imm1(toImmAny(v[0])) + }) + } + // ADCB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADCB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x10) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADCB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADCB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x10) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCB") + } + return p +} + +// ADCL performs "Add with Carry". +// +// Mnemonic : ADC +// Supported forms : (8 forms) +// +// * ADCL imm32, eax +// * ADCL imm8, r32 +// * ADCL imm32, r32 +// * ADCL r32, r32 +// * ADCL m32, r32 +// * ADCL imm8, m32 +// * ADCL imm32, m32 +// * ADCL r32, m32 +// +func (self *Program) ADCL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCL", 2, Operands { v0, v1 }) + // ADCL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x15) + m.imm4(toImmAny(v[0])) + }) + } + // ADCL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADCL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xd0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ADCL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADCL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADCL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(2, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ADCL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCL") + } + return p +} + +// ADCQ performs "Add with Carry". +// +// Mnemonic : ADC +// Supported forms : (8 forms) +// +// * ADCQ imm32, rax +// * ADCQ imm8, r64 +// * ADCQ imm32, r64 +// * ADCQ r64, r64 +// * ADCQ m64, r64 +// * ADCQ imm8, m64 +// * ADCQ imm32, m64 +// * ADCQ r64, m64 +// +func (self *Program) ADCQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCQ", 2, Operands { v0, v1 }) + // ADCQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x15) + m.imm4(toImmAny(v[0])) + }) + } + // ADCQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADCQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xd0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ADCQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADCQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADCQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(2, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ADCQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCQ") + } + return p +} + +// ADCW performs "Add with Carry". +// +// Mnemonic : ADC +// Supported forms : (8 forms) +// +// * ADCW imm16, ax +// * ADCW imm8, r16 +// * ADCW imm16, r16 +// * ADCW r16, r16 +// * ADCW m16, r16 +// * ADCW imm8, m16 +// * ADCW imm16, m16 +// * ADCW r16, m16 +// +func (self *Program) ADCW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCW", 2, Operands { v0, v1 }) + // ADCW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x15) + m.imm2(toImmAny(v[0])) + }) + } + // ADCW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADCW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xd0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // ADCW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADCW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADCW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(2, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // ADCW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCW") + } + return p +} + +// ADCXL performs "Unsigned Integer Addition of Two Operands with Carry Flag". +// +// Mnemonic : ADCX +// Supported forms : (2 forms) +// +// * ADCXL r32, r32 [ADX] +// * ADCXL m32, r32 [ADX] +// +func (self *Program) ADCXL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCXL", 2, Operands { v0, v1 }) + // ADCXL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCXL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCXL") + } + return p +} + +// ADCXQ performs "Unsigned Integer Addition of Two Operands with Carry Flag". +// +// Mnemonic : ADCX +// Supported forms : (2 forms) +// +// * ADCXQ r64, r64 [ADX] +// * ADCXQ m64, r64 [ADX] +// +func (self *Program) ADCXQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADCXQ", 2, Operands { v0, v1 }) + // ADCXQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADCXQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADCXQ") + } + return p +} + +// ADDB performs "Add". +// +// Mnemonic : ADD +// Supported forms : (6 forms) +// +// * ADDB imm8, al +// * ADDB imm8, r8 +// * ADDB r8, r8 +// * ADDB m8, r8 +// * ADDB imm8, m8 +// * ADDB r8, m8 +// +func (self *Program) ADDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDB", 2, Operands { v0, v1 }) + // ADDB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x04) + m.imm1(toImmAny(v[0])) + }) + } + // ADDB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADDB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x00) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x02) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x02) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADDB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADDB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x00) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDB") + } + return p +} + +// ADDL performs "Add". +// +// Mnemonic : ADD +// Supported forms : (8 forms) +// +// * ADDL imm32, eax +// * ADDL imm8, r32 +// * ADDL imm32, r32 +// * ADDL r32, r32 +// * ADDL m32, r32 +// * ADDL imm8, m32 +// * ADDL imm32, m32 +// * ADDL r32, m32 +// +func (self *Program) ADDL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDL", 2, Operands { v0, v1 }) + // ADDL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x05) + m.imm4(toImmAny(v[0])) + }) + } + // ADDL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADDL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ADDL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x01) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADDL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADDL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ADDL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x01) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDL") + } + return p +} + +// ADDPD performs "Add Packed Double-Precision Floating-Point Values". +// +// Mnemonic : ADDPD +// Supported forms : (2 forms) +// +// * ADDPD xmm, xmm [SSE2] +// * ADDPD m128, xmm [SSE2] +// +func (self *Program) ADDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDPD", 2, Operands { v0, v1 }) + // ADDPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDPD") + } + return p +} + +// ADDPS performs "Add Packed Single-Precision Floating-Point Values". +// +// Mnemonic : ADDPS +// Supported forms : (2 forms) +// +// * ADDPS xmm, xmm [SSE] +// * ADDPS m128, xmm [SSE] +// +func (self *Program) ADDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDPS", 2, Operands { v0, v1 }) + // ADDPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDPS") + } + return p +} + +// ADDQ performs "Add". +// +// Mnemonic : ADD +// Supported forms : (8 forms) +// +// * ADDQ imm32, rax +// * ADDQ imm8, r64 +// * ADDQ imm32, r64 +// * ADDQ r64, r64 +// * ADDQ m64, r64 +// * ADDQ imm8, m64 +// * ADDQ imm32, m64 +// * ADDQ r64, m64 +// +func (self *Program) ADDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDQ", 2, Operands { v0, v1 }) + // ADDQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x05) + m.imm4(toImmAny(v[0])) + }) + } + // ADDQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADDQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ADDQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x01) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADDQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADDQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ADDQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDQ") + } + return p +} + +// ADDSD performs "Add Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : ADDSD +// Supported forms : (2 forms) +// +// * ADDSD xmm, xmm [SSE2] +// * ADDSD m64, xmm [SSE2] +// +func (self *Program) ADDSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDSD", 2, Operands { v0, v1 }) + // ADDSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDSD") + } + return p +} + +// ADDSS performs "Add Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : ADDSS +// Supported forms : (2 forms) +// +// * ADDSS xmm, xmm [SSE] +// * ADDSS m32, xmm [SSE] +// +func (self *Program) ADDSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDSS", 2, Operands { v0, v1 }) + // ADDSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDSS") + } + return p +} + +// ADDSUBPD performs "Packed Double-FP Add/Subtract". +// +// Mnemonic : ADDSUBPD +// Supported forms : (2 forms) +// +// * ADDSUBPD xmm, xmm [SSE3] +// * ADDSUBPD m128, xmm [SSE3] +// +func (self *Program) ADDSUBPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDSUBPD", 2, Operands { v0, v1 }) + // ADDSUBPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDSUBPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDSUBPD") + } + return p +} + +// ADDSUBPS performs "Packed Single-FP Add/Subtract". +// +// Mnemonic : ADDSUBPS +// Supported forms : (2 forms) +// +// * ADDSUBPS xmm, xmm [SSE3] +// * ADDSUBPS m128, xmm [SSE3] +// +func (self *Program) ADDSUBPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDSUBPS", 2, Operands { v0, v1 }) + // ADDSUBPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDSUBPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDSUBPS") + } + return p +} + +// ADDW performs "Add". +// +// Mnemonic : ADD +// Supported forms : (8 forms) +// +// * ADDW imm16, ax +// * ADDW imm8, r16 +// * ADDW imm16, r16 +// * ADDW r16, r16 +// * ADDW m16, r16 +// * ADDW imm8, m16 +// * ADDW imm16, m16 +// * ADDW r16, m16 +// +func (self *Program) ADDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADDW", 2, Operands { v0, v1 }) + // ADDW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x05) + m.imm2(toImmAny(v[0])) + }) + } + // ADDW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ADDW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // ADDW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x01) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADDW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ADDW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ADDW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(0, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // ADDW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x01) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADDW") + } + return p +} + +// ADOXL performs "Unsigned Integer Addition of Two Operands with Overflow Flag". +// +// Mnemonic : ADOX +// Supported forms : (2 forms) +// +// * ADOXL r32, r32 [ADX] +// * ADOXL m32, r32 [ADX] +// +func (self *Program) ADOXL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADOXL", 2, Operands { v0, v1 }) + // ADOXL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADOXL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADOXL") + } + return p +} + +// ADOXQ performs "Unsigned Integer Addition of Two Operands with Overflow Flag". +// +// Mnemonic : ADOX +// Supported forms : (2 forms) +// +// * ADOXQ r64, r64 [ADX] +// * ADOXQ m64, r64 [ADX] +// +func (self *Program) ADOXQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ADOXQ", 2, Operands { v0, v1 }) + // ADOXQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ADOXQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_ADX) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ADOXQ") + } + return p +} + +// AESDEC performs "Perform One Round of an AES Decryption Flow". +// +// Mnemonic : AESDEC +// Supported forms : (2 forms) +// +// * AESDEC xmm, xmm [AES] +// * AESDEC m128, xmm [AES] +// +func (self *Program) AESDEC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESDEC", 2, Operands { v0, v1 }) + // AESDEC xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xde) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESDEC m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xde) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESDEC") + } + return p +} + +// AESDECLAST performs "Perform Last Round of an AES Decryption Flow". +// +// Mnemonic : AESDECLAST +// Supported forms : (2 forms) +// +// * AESDECLAST xmm, xmm [AES] +// * AESDECLAST m128, xmm [AES] +// +func (self *Program) AESDECLAST(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESDECLAST", 2, Operands { v0, v1 }) + // AESDECLAST xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESDECLAST m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESDECLAST") + } + return p +} + +// AESENC performs "Perform One Round of an AES Encryption Flow". +// +// Mnemonic : AESENC +// Supported forms : (2 forms) +// +// * AESENC xmm, xmm [AES] +// * AESENC m128, xmm [AES] +// +func (self *Program) AESENC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESENC", 2, Operands { v0, v1 }) + // AESENC xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESENC m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESENC") + } + return p +} + +// AESENCLAST performs "Perform Last Round of an AES Encryption Flow". +// +// Mnemonic : AESENCLAST +// Supported forms : (2 forms) +// +// * AESENCLAST xmm, xmm [AES] +// * AESENCLAST m128, xmm [AES] +// +func (self *Program) AESENCLAST(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESENCLAST", 2, Operands { v0, v1 }) + // AESENCLAST xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESENCLAST m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESENCLAST") + } + return p +} + +// AESIMC performs "Perform the AES InvMixColumn Transformation". +// +// Mnemonic : AESIMC +// Supported forms : (2 forms) +// +// * AESIMC xmm, xmm [AES] +// * AESIMC m128, xmm [AES] +// +func (self *Program) AESIMC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("AESIMC", 2, Operands { v0, v1 }) + // AESIMC xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // AESIMC m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for AESIMC") + } + return p +} + +// AESKEYGENASSIST performs "AES Round Key Generation Assist". +// +// Mnemonic : AESKEYGENASSIST +// Supported forms : (2 forms) +// +// * AESKEYGENASSIST imm8, xmm, xmm [AES] +// * AESKEYGENASSIST imm8, m128, xmm [AES] +// +func (self *Program) AESKEYGENASSIST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("AESKEYGENASSIST", 3, Operands { v0, v1, v2 }) + // AESKEYGENASSIST imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // AESKEYGENASSIST imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for AESKEYGENASSIST") + } + return p +} + +// ANDB performs "Logical AND". +// +// Mnemonic : AND +// Supported forms : (6 forms) +// +// * ANDB imm8, al +// * ANDB imm8, r8 +// * ANDB r8, r8 +// * ANDB m8, r8 +// * ANDB imm8, m8 +// * ANDB r8, m8 +// +func (self *Program) ANDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDB", 2, Operands { v0, v1 }) + // ANDB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x24) + m.imm1(toImmAny(v[0])) + }) + } + // ANDB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ANDB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x20) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ANDB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ANDB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x20) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDB") + } + return p +} + +// ANDL performs "Logical AND". +// +// Mnemonic : AND +// Supported forms : (8 forms) +// +// * ANDL imm32, eax +// * ANDL imm8, r32 +// * ANDL imm32, r32 +// * ANDL r32, r32 +// * ANDL m32, r32 +// * ANDL imm8, m32 +// * ANDL imm32, m32 +// * ANDL r32, m32 +// +func (self *Program) ANDL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDL", 2, Operands { v0, v1 }) + // ANDL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x25) + m.imm4(toImmAny(v[0])) + }) + } + // ANDL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ANDL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xe0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ANDL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ANDL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ANDL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(4, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ANDL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDL") + } + return p +} + +// ANDNL performs "Logical AND NOT". +// +// Mnemonic : ANDN +// Supported forms : (2 forms) +// +// * ANDNL r32, r32, r32 [BMI] +// * ANDNL m32, r32, r32 [BMI] +// +func (self *Program) ANDNL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ANDNL", 3, Operands { v0, v1, v2 }) + // ANDNL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // ANDNL m32, r32, r32 + if isM32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDNL") + } + return p +} + +// ANDNPD performs "Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : ANDNPD +// Supported forms : (2 forms) +// +// * ANDNPD xmm, xmm [SSE2] +// * ANDNPD m128, xmm [SSE2] +// +func (self *Program) ANDNPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDNPD", 2, Operands { v0, v1 }) + // ANDNPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x55) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDNPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x55) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDNPD") + } + return p +} + +// ANDNPS performs "Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : ANDNPS +// Supported forms : (2 forms) +// +// * ANDNPS xmm, xmm [SSE] +// * ANDNPS m128, xmm [SSE] +// +func (self *Program) ANDNPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDNPS", 2, Operands { v0, v1 }) + // ANDNPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x55) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDNPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x55) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDNPS") + } + return p +} + +// ANDNQ performs "Logical AND NOT". +// +// Mnemonic : ANDN +// Supported forms : (2 forms) +// +// * ANDNQ r64, r64, r64 [BMI] +// * ANDNQ m64, r64, r64 [BMI] +// +func (self *Program) ANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ANDNQ", 3, Operands { v0, v1, v2 }) + // ANDNQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // ANDNQ m64, r64, r64 + if isM64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDNQ") + } + return p +} + +// ANDPD performs "Bitwise Logical AND of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : ANDPD +// Supported forms : (2 forms) +// +// * ANDPD xmm, xmm [SSE2] +// * ANDPD m128, xmm [SSE2] +// +func (self *Program) ANDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDPD", 2, Operands { v0, v1 }) + // ANDPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x54) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x54) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDPD") + } + return p +} + +// ANDPS performs "Bitwise Logical AND of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : ANDPS +// Supported forms : (2 forms) +// +// * ANDPS xmm, xmm [SSE] +// * ANDPS m128, xmm [SSE] +// +func (self *Program) ANDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDPS", 2, Operands { v0, v1 }) + // ANDPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x54) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x54) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDPS") + } + return p +} + +// ANDQ performs "Logical AND". +// +// Mnemonic : AND +// Supported forms : (8 forms) +// +// * ANDQ imm32, rax +// * ANDQ imm8, r64 +// * ANDQ imm32, r64 +// * ANDQ r64, r64 +// * ANDQ m64, r64 +// * ANDQ imm8, m64 +// * ANDQ imm32, m64 +// * ANDQ r64, m64 +// +func (self *Program) ANDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDQ", 2, Operands { v0, v1 }) + // ANDQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x25) + m.imm4(toImmAny(v[0])) + }) + } + // ANDQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ANDQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xe0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ANDQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ANDQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ANDQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(4, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ANDQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDQ") + } + return p +} + +// ANDW performs "Logical AND". +// +// Mnemonic : AND +// Supported forms : (8 forms) +// +// * ANDW imm16, ax +// * ANDW imm8, r16 +// * ANDW imm16, r16 +// * ANDW r16, r16 +// * ANDW m16, r16 +// * ANDW imm8, m16 +// * ANDW imm16, m16 +// * ANDW r16, m16 +// +func (self *Program) ANDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ANDW", 2, Operands { v0, v1 }) + // ANDW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x25) + m.imm2(toImmAny(v[0])) + }) + } + // ANDW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ANDW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xe0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // ANDW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ANDW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ANDW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ANDW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(4, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // ANDW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ANDW") + } + return p +} + +// BEXTR performs "Bit Field Extract". +// +// Mnemonic : BEXTR +// Supported forms : (8 forms) +// +// * BEXTR imm32, r32, r32 [TBM] +// * BEXTR imm32, m32, r32 [TBM] +// * BEXTR imm32, r64, r64 [TBM] +// * BEXTR imm32, m64, r64 [TBM] +// * BEXTR r32, r32, r32 [BMI] +// * BEXTR r32, m32, r32 [BMI] +// * BEXTR r64, r64, r64 [BMI] +// * BEXTR r64, m64, r64 [BMI] +// +func (self *Program) BEXTR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BEXTR", 3, Operands { v0, v1, v2 }) + // BEXTR imm32, r32, r32 + if isImm32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xea ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // BEXTR imm32, m32, r32 + if isImm32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1010, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // BEXTR imm32, r64, r64 + if isImm32(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xea ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf8) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // BEXTR imm32, m64, r64 + if isImm32(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1010, 0x80, hcode(v[2]), addr(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // BEXTR r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BEXTR r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // BEXTR r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf8 ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BEXTR r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BEXTR") + } + return p +} + +// BLCFILL performs "Fill From Lowest Clear Bit". +// +// Mnemonic : BLCFILL +// Supported forms : (4 forms) +// +// * BLCFILL r32, r32 [TBM] +// * BLCFILL m32, r32 [TBM] +// * BLCFILL r64, r64 [TBM] +// * BLCFILL m64, r64 [TBM] +// +func (self *Program) BLCFILL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCFILL", 2, Operands { v0, v1 }) + // BLCFILL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLCFILL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(1, addr(v[0]), 1) + }) + } + // BLCFILL r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLCFILL m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCFILL") + } + return p +} + +// BLCI performs "Isolate Lowest Clear Bit". +// +// Mnemonic : BLCI +// Supported forms : (4 forms) +// +// * BLCI r32, r32 [TBM] +// * BLCI m32, r32 [TBM] +// * BLCI r64, r64 [TBM] +// * BLCI m64, r64 [TBM] +// +func (self *Program) BLCI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCI", 2, Operands { v0, v1 }) + // BLCI r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xf0 | lcode(v[0])) + }) + } + // BLCI m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(6, addr(v[0]), 1) + }) + } + // BLCI r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xf0 | lcode(v[0])) + }) + } + // BLCI m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCI") + } + return p +} + +// BLCIC performs "Isolate Lowest Set Bit and Complement". +// +// Mnemonic : BLCIC +// Supported forms : (4 forms) +// +// * BLCIC r32, r32 [TBM] +// * BLCIC m32, r32 [TBM] +// * BLCIC r64, r64 [TBM] +// * BLCIC m64, r64 [TBM] +// +func (self *Program) BLCIC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCIC", 2, Operands { v0, v1 }) + // BLCIC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xe8 | lcode(v[0])) + }) + } + // BLCIC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(5, addr(v[0]), 1) + }) + } + // BLCIC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xe8 | lcode(v[0])) + }) + } + // BLCIC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(5, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCIC") + } + return p +} + +// BLCMSK performs "Mask From Lowest Clear Bit". +// +// Mnemonic : BLCMSK +// Supported forms : (4 forms) +// +// * BLCMSK r32, r32 [TBM] +// * BLCMSK m32, r32 [TBM] +// * BLCMSK r64, r64 [TBM] +// * BLCMSK m64, r64 [TBM] +// +func (self *Program) BLCMSK(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCMSK", 2, Operands { v0, v1 }) + // BLCMSK r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLCMSK m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(1, addr(v[0]), 1) + }) + } + // BLCMSK r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLCMSK m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCMSK") + } + return p +} + +// BLCS performs "Set Lowest Clear Bit". +// +// Mnemonic : BLCS +// Supported forms : (4 forms) +// +// * BLCS r32, r32 [TBM] +// * BLCS m32, r32 [TBM] +// * BLCS r64, r64 [TBM] +// * BLCS m64, r64 [TBM] +// +func (self *Program) BLCS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLCS", 2, Operands { v0, v1 }) + // BLCS r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xd8 | lcode(v[0])) + }) + } + // BLCS m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(3, addr(v[0]), 1) + }) + } + // BLCS r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xd8 | lcode(v[0])) + }) + } + // BLCS m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLCS") + } + return p +} + +// BLENDPD performs "Blend Packed Double Precision Floating-Point Values". +// +// Mnemonic : BLENDPD +// Supported forms : (2 forms) +// +// * BLENDPD imm8, xmm, xmm [SSE4.1] +// * BLENDPD imm8, m128, xmm [SSE4.1] +// +func (self *Program) BLENDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BLENDPD", 3, Operands { v0, v1, v2 }) + // BLENDPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BLENDPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for BLENDPD") + } + return p +} + +// BLENDPS performs " Blend Packed Single Precision Floating-Point Values". +// +// Mnemonic : BLENDPS +// Supported forms : (2 forms) +// +// * BLENDPS imm8, xmm, xmm [SSE4.1] +// * BLENDPS imm8, m128, xmm [SSE4.1] +// +func (self *Program) BLENDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BLENDPS", 3, Operands { v0, v1, v2 }) + // BLENDPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BLENDPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for BLENDPS") + } + return p +} + +// BLENDVPD performs " Variable Blend Packed Double Precision Floating-Point Values". +// +// Mnemonic : BLENDVPD +// Supported forms : (2 forms) +// +// * BLENDVPD xmm0, xmm, xmm [SSE4.1] +// * BLENDVPD xmm0, m128, xmm [SSE4.1] +// +func (self *Program) BLENDVPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BLENDVPD", 3, Operands { v0, v1, v2 }) + // BLENDVPD xmm0, xmm, xmm + if v0 == XMM0 && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BLENDVPD xmm0, m128, xmm + if v0 == XMM0 && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLENDVPD") + } + return p +} + +// BLENDVPS performs " Variable Blend Packed Single Precision Floating-Point Values". +// +// Mnemonic : BLENDVPS +// Supported forms : (2 forms) +// +// * BLENDVPS xmm0, xmm, xmm [SSE4.1] +// * BLENDVPS xmm0, m128, xmm [SSE4.1] +// +func (self *Program) BLENDVPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BLENDVPS", 3, Operands { v0, v1, v2 }) + // BLENDVPS xmm0, xmm, xmm + if v0 == XMM0 && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BLENDVPS xmm0, m128, xmm + if v0 == XMM0 && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLENDVPS") + } + return p +} + +// BLSFILL performs "Fill From Lowest Set Bit". +// +// Mnemonic : BLSFILL +// Supported forms : (4 forms) +// +// * BLSFILL r32, r32 [TBM] +// * BLSFILL m32, r32 [TBM] +// * BLSFILL r64, r64 [TBM] +// * BLSFILL m64, r64 [TBM] +// +func (self *Program) BLSFILL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSFILL", 2, Operands { v0, v1 }) + // BLSFILL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xd0 | lcode(v[0])) + }) + } + // BLSFILL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(2, addr(v[0]), 1) + }) + } + // BLSFILL r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xd0 | lcode(v[0])) + }) + } + // BLSFILL m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSFILL") + } + return p +} + +// BLSI performs "Isolate Lowest Set Bit". +// +// Mnemonic : BLSI +// Supported forms : (4 forms) +// +// * BLSI r32, r32 [BMI] +// * BLSI m32, r32 [BMI] +// * BLSI r64, r64 [BMI] +// * BLSI m64, r64 [BMI] +// +func (self *Program) BLSI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSI", 2, Operands { v0, v1 }) + // BLSI r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xd8 | lcode(v[0])) + }) + } + // BLSI m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(3, addr(v[0]), 1) + }) + } + // BLSI r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xd8 | lcode(v[0])) + }) + } + // BLSI m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSI") + } + return p +} + +// BLSIC performs "Isolate Lowest Set Bit and Complement". +// +// Mnemonic : BLSIC +// Supported forms : (4 forms) +// +// * BLSIC r32, r32 [TBM] +// * BLSIC m32, r32 [TBM] +// * BLSIC r64, r64 [TBM] +// * BLSIC m64, r64 [TBM] +// +func (self *Program) BLSIC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSIC", 2, Operands { v0, v1 }) + // BLSIC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xf0 | lcode(v[0])) + }) + } + // BLSIC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(6, addr(v[0]), 1) + }) + } + // BLSIC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xf0 | lcode(v[0])) + }) + } + // BLSIC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSIC") + } + return p +} + +// BLSMSK performs "Mask From Lowest Set Bit". +// +// Mnemonic : BLSMSK +// Supported forms : (4 forms) +// +// * BLSMSK r32, r32 [BMI] +// * BLSMSK m32, r32 [BMI] +// * BLSMSK r64, r64 [BMI] +// * BLSMSK m64, r64 [BMI] +// +func (self *Program) BLSMSK(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSMSK", 2, Operands { v0, v1 }) + // BLSMSK r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xd0 | lcode(v[0])) + }) + } + // BLSMSK m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(2, addr(v[0]), 1) + }) + } + // BLSMSK r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xd0 | lcode(v[0])) + }) + } + // BLSMSK m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSMSK") + } + return p +} + +// BLSR performs "Reset Lowest Set Bit". +// +// Mnemonic : BLSR +// Supported forms : (4 forms) +// +// * BLSR r32, r32 [BMI] +// * BLSR m32, r32 [BMI] +// * BLSR r64, r64 [BMI] +// * BLSR m64, r64 [BMI] +// +func (self *Program) BLSR(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BLSR", 2, Operands { v0, v1 }) + // BLSR r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLSR m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(1, addr(v[0]), 1) + }) + } + // BLSR r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0xf3) + m.emit(0xc8 | lcode(v[0])) + }) + } + // BLSR m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BLSR") + } + return p +} + +// BSFL performs "Bit Scan Forward". +// +// Mnemonic : BSF +// Supported forms : (2 forms) +// +// * BSFL r32, r32 +// * BSFL m32, r32 +// +func (self *Program) BSFL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSFL", 2, Operands { v0, v1 }) + // BSFL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSFL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSFL") + } + return p +} + +// BSFQ performs "Bit Scan Forward". +// +// Mnemonic : BSF +// Supported forms : (2 forms) +// +// * BSFQ r64, r64 +// * BSFQ m64, r64 +// +func (self *Program) BSFQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSFQ", 2, Operands { v0, v1 }) + // BSFQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSFQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSFQ") + } + return p +} + +// BSFW performs "Bit Scan Forward". +// +// Mnemonic : BSF +// Supported forms : (2 forms) +// +// * BSFW r16, r16 +// * BSFW m16, r16 +// +func (self *Program) BSFW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSFW", 2, Operands { v0, v1 }) + // BSFW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSFW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSFW") + } + return p +} + +// BSRL performs "Bit Scan Reverse". +// +// Mnemonic : BSR +// Supported forms : (2 forms) +// +// * BSRL r32, r32 +// * BSRL m32, r32 +// +func (self *Program) BSRL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSRL", 2, Operands { v0, v1 }) + // BSRL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSRL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSRL") + } + return p +} + +// BSRQ performs "Bit Scan Reverse". +// +// Mnemonic : BSR +// Supported forms : (2 forms) +// +// * BSRQ r64, r64 +// * BSRQ m64, r64 +// +func (self *Program) BSRQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSRQ", 2, Operands { v0, v1 }) + // BSRQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSRQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSRQ") + } + return p +} + +// BSRW performs "Bit Scan Reverse". +// +// Mnemonic : BSR +// Supported forms : (2 forms) +// +// * BSRW r16, r16 +// * BSRW m16, r16 +// +func (self *Program) BSRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BSRW", 2, Operands { v0, v1 }) + // BSRW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // BSRW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BSRW") + } + return p +} + +// BSWAPL performs "Byte Swap". +// +// Mnemonic : BSWAP +// Supported forms : (1 form) +// +// * BSWAPL r32 +// +func (self *Program) BSWAPL(v0 interface{}) *Instruction { + p := self.alloc("BSWAPL", 1, Operands { v0 }) + // BSWAPL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc8 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for BSWAPL") + } + return p +} + +// BSWAPQ performs "Byte Swap". +// +// Mnemonic : BSWAP +// Supported forms : (1 form) +// +// * BSWAPQ r64 +// +func (self *Program) BSWAPQ(v0 interface{}) *Instruction { + p := self.alloc("BSWAPQ", 1, Operands { v0 }) + // BSWAPQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xc8 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for BSWAPQ") + } + return p +} + +// BTCL performs "Bit Test and Complement". +// +// Mnemonic : BTC +// Supported forms : (4 forms) +// +// * BTCL imm8, r32 +// * BTCL r32, r32 +// * BTCL imm8, m32 +// * BTCL r32, m32 +// +func (self *Program) BTCL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTCL", 2, Operands { v0, v1 }) + // BTCL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTCL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTCL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTCL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xbb) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTCL") + } + return p +} + +// BTCQ performs "Bit Test and Complement". +// +// Mnemonic : BTC +// Supported forms : (4 forms) +// +// * BTCQ imm8, r64 +// * BTCQ r64, r64 +// * BTCQ imm8, m64 +// * BTCQ r64, m64 +// +func (self *Program) BTCQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTCQ", 2, Operands { v0, v1 }) + // BTCQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTCQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTCQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTCQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xbb) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTCQ") + } + return p +} + +// BTCW performs "Bit Test and Complement". +// +// Mnemonic : BTC +// Supported forms : (4 forms) +// +// * BTCW imm8, r16 +// * BTCW r16, r16 +// * BTCW imm8, m16 +// * BTCW r16, m16 +// +func (self *Program) BTCW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTCW", 2, Operands { v0, v1 }) + // BTCW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTCW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTCW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTCW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xbb) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTCW") + } + return p +} + +// BTL performs "Bit Test". +// +// Mnemonic : BT +// Supported forms : (4 forms) +// +// * BTL imm8, r32 +// * BTL r32, r32 +// * BTL imm8, m32 +// * BTL r32, m32 +// +func (self *Program) BTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTL", 2, Operands { v0, v1 }) + // BTL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTL") + } + return p +} + +// BTQ performs "Bit Test". +// +// Mnemonic : BT +// Supported forms : (4 forms) +// +// * BTQ imm8, r64 +// * BTQ r64, r64 +// * BTQ imm8, m64 +// * BTQ r64, m64 +// +func (self *Program) BTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTQ", 2, Operands { v0, v1 }) + // BTQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTQ") + } + return p +} + +// BTRL performs "Bit Test and Reset". +// +// Mnemonic : BTR +// Supported forms : (4 forms) +// +// * BTRL imm8, r32 +// * BTRL r32, r32 +// * BTRL imm8, m32 +// * BTRL r32, m32 +// +func (self *Program) BTRL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTRL", 2, Operands { v0, v1 }) + // BTRL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTRL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xb3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTRL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTRL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xb3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTRL") + } + return p +} + +// BTRQ performs "Bit Test and Reset". +// +// Mnemonic : BTR +// Supported forms : (4 forms) +// +// * BTRQ imm8, r64 +// * BTRQ r64, r64 +// * BTRQ imm8, m64 +// * BTRQ r64, m64 +// +func (self *Program) BTRQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTRQ", 2, Operands { v0, v1 }) + // BTRQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTRQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xb3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTRQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTRQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xb3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTRQ") + } + return p +} + +// BTRW performs "Bit Test and Reset". +// +// Mnemonic : BTR +// Supported forms : (4 forms) +// +// * BTRW imm8, r16 +// * BTRW r16, r16 +// * BTRW imm8, m16 +// * BTRW r16, m16 +// +func (self *Program) BTRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTRW", 2, Operands { v0, v1 }) + // BTRW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTRW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xb3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTRW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTRW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xb3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTRW") + } + return p +} + +// BTSL performs "Bit Test and Set". +// +// Mnemonic : BTS +// Supported forms : (4 forms) +// +// * BTSL imm8, r32 +// * BTSL r32, r32 +// * BTSL imm8, m32 +// * BTSL r32, m32 +// +func (self *Program) BTSL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTSL", 2, Operands { v0, v1 }) + // BTSL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTSL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xab) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTSL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTSL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xab) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTSL") + } + return p +} + +// BTSQ performs "Bit Test and Set". +// +// Mnemonic : BTS +// Supported forms : (4 forms) +// +// * BTSQ imm8, r64 +// * BTSQ r64, r64 +// * BTSQ imm8, m64 +// * BTSQ r64, m64 +// +func (self *Program) BTSQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTSQ", 2, Operands { v0, v1 }) + // BTSQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTSQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xab) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTSQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTSQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xab) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTSQ") + } + return p +} + +// BTSW performs "Bit Test and Set". +// +// Mnemonic : BTS +// Supported forms : (4 forms) +// +// * BTSW imm8, r16 +// * BTSW r16, r16 +// * BTSW imm8, m16 +// * BTSW r16, m16 +// +func (self *Program) BTSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTSW", 2, Operands { v0, v1 }) + // BTSW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTSW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xab) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTSW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTSW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xab) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTSW") + } + return p +} + +// BTW performs "Bit Test". +// +// Mnemonic : BT +// Supported forms : (4 forms) +// +// * BTW imm8, r16 +// * BTW r16, r16 +// * BTW imm8, m16 +// * BTW r16, m16 +// +func (self *Program) BTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("BTW", 2, Operands { v0, v1 }) + // BTW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0xba) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // BTW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // BTW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x0f) + m.emit(0xba) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // BTW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BTW") + } + return p +} + +// BZHI performs "Zero High Bits Starting with Specified Bit Position". +// +// Mnemonic : BZHI +// Supported forms : (4 forms) +// +// * BZHI r32, r32, r32 [BMI2] +// * BZHI r32, m32, r32 [BMI2] +// * BZHI r64, r64, r64 [BMI2] +// * BZHI r64, m64, r64 [BMI2] +// +func (self *Program) BZHI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("BZHI", 3, Operands { v0, v1, v2 }) + // BZHI r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BZHI r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // BZHI r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf8 ^ (hlcode(v[0]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // BZHI r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x80, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for BZHI") + } + return p +} + +// CALL performs "Call Procedure". +// +// Mnemonic : CALL +// Supported forms : (1 form) +// +// * CALL rel32 +// +func (self *Program) CALL(v0 interface{}) *Instruction { + p := self.alloc("CALL", 1, Operands { v0 }) + // CALL rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xe8) + m.imm4(relv(v[0])) + }) + } + // CALL label + if isLabel(v0) { + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0xe8) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CALL") + } + return p +} + +// CALLQ performs "Call Procedure". +// +// Mnemonic : CALL +// Supported forms : (2 forms) +// +// * CALLQ r64 +// * CALLQ m64 +// +func (self *Program) CALLQ(v0 interface{}) *Instruction { + p := self.alloc("CALLQ", 1, Operands { v0 }) + // CALLQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xd0 | lcode(v[0])) + }) + } + // CALLQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CALLQ") + } + return p +} + +// CBTW performs "Convert Byte to Word". +// +// Mnemonic : CBW +// Supported forms : (1 form) +// +// * CBTW +// +func (self *Program) CBTW() *Instruction { + p := self.alloc("CBTW", 0, Operands { }) + // CBTW + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x98) + }) + return p +} + +// CLC performs "Clear Carry Flag". +// +// Mnemonic : CLC +// Supported forms : (1 form) +// +// * CLC +// +func (self *Program) CLC() *Instruction { + p := self.alloc("CLC", 0, Operands { }) + // CLC + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf8) + }) + return p +} + +// CLD performs "Clear Direction Flag". +// +// Mnemonic : CLD +// Supported forms : (1 form) +// +// * CLD +// +func (self *Program) CLD() *Instruction { + p := self.alloc("CLD", 0, Operands { }) + // CLD + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xfc) + }) + return p +} + +// CLFLUSH performs "Flush Cache Line". +// +// Mnemonic : CLFLUSH +// Supported forms : (1 form) +// +// * CLFLUSH m8 [CLFLUSH] +// +func (self *Program) CLFLUSH(v0 interface{}) *Instruction { + p := self.alloc("CLFLUSH", 1, Operands { v0 }) + // CLFLUSH m8 + if isM8(v0) { + self.require(ISA_CLFLUSH) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CLFLUSH") + } + return p +} + +// CLFLUSHOPT performs "Flush Cache Line Optimized". +// +// Mnemonic : CLFLUSHOPT +// Supported forms : (1 form) +// +// * CLFLUSHOPT m8 [CLFLUSHOPT] +// +func (self *Program) CLFLUSHOPT(v0 interface{}) *Instruction { + p := self.alloc("CLFLUSHOPT", 1, Operands { v0 }) + // CLFLUSHOPT m8 + if isM8(v0) { + self.require(ISA_CLFLUSHOPT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CLFLUSHOPT") + } + return p +} + +// CLTD performs "Convert Doubleword to Quadword". +// +// Mnemonic : CDQ +// Supported forms : (1 form) +// +// * CLTD +// +func (self *Program) CLTD() *Instruction { + p := self.alloc("CLTD", 0, Operands { }) + // CLTD + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x99) + }) + return p +} + +// CLTQ performs "Convert Doubleword to Quadword". +// +// Mnemonic : CDQE +// Supported forms : (1 form) +// +// * CLTQ +// +func (self *Program) CLTQ() *Instruction { + p := self.alloc("CLTQ", 0, Operands { }) + // CLTQ + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x98) + }) + return p +} + +// CLWB performs "Cache Line Write Back". +// +// Mnemonic : CLWB +// Supported forms : (1 form) +// +// * CLWB m8 [CLWB] +// +func (self *Program) CLWB(v0 interface{}) *Instruction { + p := self.alloc("CLWB", 1, Operands { v0 }) + // CLWB m8 + if isM8(v0) { + self.require(ISA_CLWB) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CLWB") + } + return p +} + +// CLZERO performs "Zero-out 64-bit Cache Line". +// +// Mnemonic : CLZERO +// Supported forms : (1 form) +// +// * CLZERO [CLZERO] +// +func (self *Program) CLZERO() *Instruction { + p := self.alloc("CLZERO", 0, Operands { }) + // CLZERO + self.require(ISA_CLZERO) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xfc) + }) + return p +} + +// CMC performs "Complement Carry Flag". +// +// Mnemonic : CMC +// Supported forms : (1 form) +// +// * CMC +// +func (self *Program) CMC() *Instruction { + p := self.alloc("CMC", 0, Operands { }) + // CMC + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf5) + }) + return p +} + +// CMOVA performs "Move if above (CF == 0 and ZF == 0)". +// +// Mnemonic : CMOVA +// Supported forms : (6 forms) +// +// * CMOVA r16, r16 [CMOV] +// * CMOVA m16, r16 [CMOV] +// * CMOVA r32, r32 [CMOV] +// * CMOVA m32, r32 [CMOV] +// * CMOVA r64, r64 [CMOV] +// * CMOVA m64, r64 [CMOV] +// +func (self *Program) CMOVA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVA", 2, Operands { v0, v1 }) + // CMOVA r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVA m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVA r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVA m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVA r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVA m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVA") + } + return p +} + +// CMOVAE performs "Move if above or equal (CF == 0)". +// +// Mnemonic : CMOVAE +// Supported forms : (6 forms) +// +// * CMOVAE r16, r16 [CMOV] +// * CMOVAE m16, r16 [CMOV] +// * CMOVAE r32, r32 [CMOV] +// * CMOVAE m32, r32 [CMOV] +// * CMOVAE r64, r64 [CMOV] +// * CMOVAE m64, r64 [CMOV] +// +func (self *Program) CMOVAE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVAE", 2, Operands { v0, v1 }) + // CMOVAE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVAE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVAE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVAE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVAE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVAE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVAE") + } + return p +} + +// CMOVB performs "Move if below (CF == 1)". +// +// Mnemonic : CMOVB +// Supported forms : (6 forms) +// +// * CMOVB r16, r16 [CMOV] +// * CMOVB m16, r16 [CMOV] +// * CMOVB r32, r32 [CMOV] +// * CMOVB m32, r32 [CMOV] +// * CMOVB r64, r64 [CMOV] +// * CMOVB m64, r64 [CMOV] +// +func (self *Program) CMOVB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVB", 2, Operands { v0, v1 }) + // CMOVB r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVB m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVB r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVB m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVB r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVB m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVB") + } + return p +} + +// CMOVBE performs "Move if below or equal (CF == 1 or ZF == 1)". +// +// Mnemonic : CMOVBE +// Supported forms : (6 forms) +// +// * CMOVBE r16, r16 [CMOV] +// * CMOVBE m16, r16 [CMOV] +// * CMOVBE r32, r32 [CMOV] +// * CMOVBE m32, r32 [CMOV] +// * CMOVBE r64, r64 [CMOV] +// * CMOVBE m64, r64 [CMOV] +// +func (self *Program) CMOVBE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVBE", 2, Operands { v0, v1 }) + // CMOVBE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVBE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVBE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVBE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVBE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVBE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVBE") + } + return p +} + +// CMOVC performs "Move if carry (CF == 1)". +// +// Mnemonic : CMOVC +// Supported forms : (6 forms) +// +// * CMOVC r16, r16 [CMOV] +// * CMOVC m16, r16 [CMOV] +// * CMOVC r32, r32 [CMOV] +// * CMOVC m32, r32 [CMOV] +// * CMOVC r64, r64 [CMOV] +// * CMOVC m64, r64 [CMOV] +// +func (self *Program) CMOVC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVC", 2, Operands { v0, v1 }) + // CMOVC r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVC m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVC") + } + return p +} + +// CMOVE performs "Move if equal (ZF == 1)". +// +// Mnemonic : CMOVE +// Supported forms : (6 forms) +// +// * CMOVE r16, r16 [CMOV] +// * CMOVE m16, r16 [CMOV] +// * CMOVE r32, r32 [CMOV] +// * CMOVE m32, r32 [CMOV] +// * CMOVE r64, r64 [CMOV] +// * CMOVE m64, r64 [CMOV] +// +func (self *Program) CMOVE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVE", 2, Operands { v0, v1 }) + // CMOVE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVE") + } + return p +} + +// CMOVG performs "Move if greater (ZF == 0 and SF == OF)". +// +// Mnemonic : CMOVG +// Supported forms : (6 forms) +// +// * CMOVG r16, r16 [CMOV] +// * CMOVG m16, r16 [CMOV] +// * CMOVG r32, r32 [CMOV] +// * CMOVG m32, r32 [CMOV] +// * CMOVG r64, r64 [CMOV] +// * CMOVG m64, r64 [CMOV] +// +func (self *Program) CMOVG(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVG", 2, Operands { v0, v1 }) + // CMOVG r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVG m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVG r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVG m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVG r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVG m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVG") + } + return p +} + +// CMOVGE performs "Move if greater or equal (SF == OF)". +// +// Mnemonic : CMOVGE +// Supported forms : (6 forms) +// +// * CMOVGE r16, r16 [CMOV] +// * CMOVGE m16, r16 [CMOV] +// * CMOVGE r32, r32 [CMOV] +// * CMOVGE m32, r32 [CMOV] +// * CMOVGE r64, r64 [CMOV] +// * CMOVGE m64, r64 [CMOV] +// +func (self *Program) CMOVGE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVGE", 2, Operands { v0, v1 }) + // CMOVGE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVGE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVGE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVGE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVGE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVGE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVGE") + } + return p +} + +// CMOVL performs "Move if less (SF != OF)". +// +// Mnemonic : CMOVL +// Supported forms : (6 forms) +// +// * CMOVL r16, r16 [CMOV] +// * CMOVL m16, r16 [CMOV] +// * CMOVL r32, r32 [CMOV] +// * CMOVL m32, r32 [CMOV] +// * CMOVL r64, r64 [CMOV] +// * CMOVL m64, r64 [CMOV] +// +func (self *Program) CMOVL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVL", 2, Operands { v0, v1 }) + // CMOVL r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVL m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVL r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVL m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVL") + } + return p +} + +// CMOVLE performs "Move if less or equal (ZF == 1 or SF != OF)". +// +// Mnemonic : CMOVLE +// Supported forms : (6 forms) +// +// * CMOVLE r16, r16 [CMOV] +// * CMOVLE m16, r16 [CMOV] +// * CMOVLE r32, r32 [CMOV] +// * CMOVLE m32, r32 [CMOV] +// * CMOVLE r64, r64 [CMOV] +// * CMOVLE m64, r64 [CMOV] +// +func (self *Program) CMOVLE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVLE", 2, Operands { v0, v1 }) + // CMOVLE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVLE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVLE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVLE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVLE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVLE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVLE") + } + return p +} + +// CMOVNA performs "Move if not above (CF == 1 or ZF == 1)". +// +// Mnemonic : CMOVNA +// Supported forms : (6 forms) +// +// * CMOVNA r16, r16 [CMOV] +// * CMOVNA m16, r16 [CMOV] +// * CMOVNA r32, r32 [CMOV] +// * CMOVNA m32, r32 [CMOV] +// * CMOVNA r64, r64 [CMOV] +// * CMOVNA m64, r64 [CMOV] +// +func (self *Program) CMOVNA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNA", 2, Operands { v0, v1 }) + // CMOVNA r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNA m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNA r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNA m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNA r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x46) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNA m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x46) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNA") + } + return p +} + +// CMOVNAE performs "Move if not above or equal (CF == 1)". +// +// Mnemonic : CMOVNAE +// Supported forms : (6 forms) +// +// * CMOVNAE r16, r16 [CMOV] +// * CMOVNAE m16, r16 [CMOV] +// * CMOVNAE r32, r32 [CMOV] +// * CMOVNAE m32, r32 [CMOV] +// * CMOVNAE r64, r64 [CMOV] +// * CMOVNAE m64, r64 [CMOV] +// +func (self *Program) CMOVNAE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNAE", 2, Operands { v0, v1 }) + // CMOVNAE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNAE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNAE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNAE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNAE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNAE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNAE") + } + return p +} + +// CMOVNB performs "Move if not below (CF == 0)". +// +// Mnemonic : CMOVNB +// Supported forms : (6 forms) +// +// * CMOVNB r16, r16 [CMOV] +// * CMOVNB m16, r16 [CMOV] +// * CMOVNB r32, r32 [CMOV] +// * CMOVNB m32, r32 [CMOV] +// * CMOVNB r64, r64 [CMOV] +// * CMOVNB m64, r64 [CMOV] +// +func (self *Program) CMOVNB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNB", 2, Operands { v0, v1 }) + // CMOVNB r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNB m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNB r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNB m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNB r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNB m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNB") + } + return p +} + +// CMOVNBE performs "Move if not below or equal (CF == 0 and ZF == 0)". +// +// Mnemonic : CMOVNBE +// Supported forms : (6 forms) +// +// * CMOVNBE r16, r16 [CMOV] +// * CMOVNBE m16, r16 [CMOV] +// * CMOVNBE r32, r32 [CMOV] +// * CMOVNBE m32, r32 [CMOV] +// * CMOVNBE r64, r64 [CMOV] +// * CMOVNBE m64, r64 [CMOV] +// +func (self *Program) CMOVNBE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNBE", 2, Operands { v0, v1 }) + // CMOVNBE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNBE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNBE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNBE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNBE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x47) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNBE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x47) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNBE") + } + return p +} + +// CMOVNC performs "Move if not carry (CF == 0)". +// +// Mnemonic : CMOVNC +// Supported forms : (6 forms) +// +// * CMOVNC r16, r16 [CMOV] +// * CMOVNC m16, r16 [CMOV] +// * CMOVNC r32, r32 [CMOV] +// * CMOVNC m32, r32 [CMOV] +// * CMOVNC r64, r64 [CMOV] +// * CMOVNC m64, r64 [CMOV] +// +func (self *Program) CMOVNC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNC", 2, Operands { v0, v1 }) + // CMOVNC r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNC m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x43) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNC") + } + return p +} + +// CMOVNE performs "Move if not equal (ZF == 0)". +// +// Mnemonic : CMOVNE +// Supported forms : (6 forms) +// +// * CMOVNE r16, r16 [CMOV] +// * CMOVNE m16, r16 [CMOV] +// * CMOVNE r32, r32 [CMOV] +// * CMOVNE m32, r32 [CMOV] +// * CMOVNE r64, r64 [CMOV] +// * CMOVNE m64, r64 [CMOV] +// +func (self *Program) CMOVNE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNE", 2, Operands { v0, v1 }) + // CMOVNE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNE") + } + return p +} + +// CMOVNG performs "Move if not greater (ZF == 1 or SF != OF)". +// +// Mnemonic : CMOVNG +// Supported forms : (6 forms) +// +// * CMOVNG r16, r16 [CMOV] +// * CMOVNG m16, r16 [CMOV] +// * CMOVNG r32, r32 [CMOV] +// * CMOVNG m32, r32 [CMOV] +// * CMOVNG r64, r64 [CMOV] +// * CMOVNG m64, r64 [CMOV] +// +func (self *Program) CMOVNG(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNG", 2, Operands { v0, v1 }) + // CMOVNG r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNG m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNG r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNG m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNG r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNG m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNG") + } + return p +} + +// CMOVNGE performs "Move if not greater or equal (SF != OF)". +// +// Mnemonic : CMOVNGE +// Supported forms : (6 forms) +// +// * CMOVNGE r16, r16 [CMOV] +// * CMOVNGE m16, r16 [CMOV] +// * CMOVNGE r32, r32 [CMOV] +// * CMOVNGE m32, r32 [CMOV] +// * CMOVNGE r64, r64 [CMOV] +// * CMOVNGE m64, r64 [CMOV] +// +func (self *Program) CMOVNGE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNGE", 2, Operands { v0, v1 }) + // CMOVNGE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNGE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNGE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNGE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNGE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNGE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNGE") + } + return p +} + +// CMOVNL performs "Move if not less (SF == OF)". +// +// Mnemonic : CMOVNL +// Supported forms : (6 forms) +// +// * CMOVNL r16, r16 [CMOV] +// * CMOVNL m16, r16 [CMOV] +// * CMOVNL r32, r32 [CMOV] +// * CMOVNL m32, r32 [CMOV] +// * CMOVNL r64, r64 [CMOV] +// * CMOVNL m64, r64 [CMOV] +// +func (self *Program) CMOVNL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNL", 2, Operands { v0, v1 }) + // CMOVNL r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNL m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNL r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNL m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNL") + } + return p +} + +// CMOVNLE performs "Move if not less or equal (ZF == 0 and SF == OF)". +// +// Mnemonic : CMOVNLE +// Supported forms : (6 forms) +// +// * CMOVNLE r16, r16 [CMOV] +// * CMOVNLE m16, r16 [CMOV] +// * CMOVNLE r32, r32 [CMOV] +// * CMOVNLE m32, r32 [CMOV] +// * CMOVNLE r64, r64 [CMOV] +// * CMOVNLE m64, r64 [CMOV] +// +func (self *Program) CMOVNLE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNLE", 2, Operands { v0, v1 }) + // CMOVNLE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNLE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNLE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNLE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNLE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNLE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNLE") + } + return p +} + +// CMOVNO performs "Move if not overflow (OF == 0)". +// +// Mnemonic : CMOVNO +// Supported forms : (6 forms) +// +// * CMOVNO r16, r16 [CMOV] +// * CMOVNO m16, r16 [CMOV] +// * CMOVNO r32, r32 [CMOV] +// * CMOVNO m32, r32 [CMOV] +// * CMOVNO r64, r64 [CMOV] +// * CMOVNO m64, r64 [CMOV] +// +func (self *Program) CMOVNO(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNO", 2, Operands { v0, v1 }) + // CMOVNO r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNO m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNO r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNO m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNO r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNO m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNO") + } + return p +} + +// CMOVNP performs "Move if not parity (PF == 0)". +// +// Mnemonic : CMOVNP +// Supported forms : (6 forms) +// +// * CMOVNP r16, r16 [CMOV] +// * CMOVNP m16, r16 [CMOV] +// * CMOVNP r32, r32 [CMOV] +// * CMOVNP m32, r32 [CMOV] +// * CMOVNP r64, r64 [CMOV] +// * CMOVNP m64, r64 [CMOV] +// +func (self *Program) CMOVNP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNP", 2, Operands { v0, v1 }) + // CMOVNP r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNP m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNP r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNP m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNP r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNP m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNP") + } + return p +} + +// CMOVNS performs "Move if not sign (SF == 0)". +// +// Mnemonic : CMOVNS +// Supported forms : (6 forms) +// +// * CMOVNS r16, r16 [CMOV] +// * CMOVNS m16, r16 [CMOV] +// * CMOVNS r32, r32 [CMOV] +// * CMOVNS m32, r32 [CMOV] +// * CMOVNS r64, r64 [CMOV] +// * CMOVNS m64, r64 [CMOV] +// +func (self *Program) CMOVNS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNS", 2, Operands { v0, v1 }) + // CMOVNS r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x49) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNS m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x49) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNS r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x49) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNS m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x49) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNS r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x49) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNS m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x49) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNS") + } + return p +} + +// CMOVNZ performs "Move if not zero (ZF == 0)". +// +// Mnemonic : CMOVNZ +// Supported forms : (6 forms) +// +// * CMOVNZ r16, r16 [CMOV] +// * CMOVNZ m16, r16 [CMOV] +// * CMOVNZ r32, r32 [CMOV] +// * CMOVNZ m32, r32 [CMOV] +// * CMOVNZ r64, r64 [CMOV] +// * CMOVNZ m64, r64 [CMOV] +// +func (self *Program) CMOVNZ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVNZ", 2, Operands { v0, v1 }) + // CMOVNZ r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNZ m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNZ r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNZ m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVNZ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x45) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVNZ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x45) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVNZ") + } + return p +} + +// CMOVO performs "Move if overflow (OF == 1)". +// +// Mnemonic : CMOVO +// Supported forms : (6 forms) +// +// * CMOVO r16, r16 [CMOV] +// * CMOVO m16, r16 [CMOV] +// * CMOVO r32, r32 [CMOV] +// * CMOVO m32, r32 [CMOV] +// * CMOVO r64, r64 [CMOV] +// * CMOVO m64, r64 [CMOV] +// +func (self *Program) CMOVO(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVO", 2, Operands { v0, v1 }) + // CMOVO r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x40) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVO m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x40) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVO r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x40) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVO m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x40) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVO r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x40) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVO m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x40) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVO") + } + return p +} + +// CMOVP performs "Move if parity (PF == 1)". +// +// Mnemonic : CMOVP +// Supported forms : (6 forms) +// +// * CMOVP r16, r16 [CMOV] +// * CMOVP m16, r16 [CMOV] +// * CMOVP r32, r32 [CMOV] +// * CMOVP m32, r32 [CMOV] +// * CMOVP r64, r64 [CMOV] +// * CMOVP m64, r64 [CMOV] +// +func (self *Program) CMOVP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVP", 2, Operands { v0, v1 }) + // CMOVP r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVP m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVP r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVP m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVP r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVP m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVP") + } + return p +} + +// CMOVPE performs "Move if parity even (PF == 1)". +// +// Mnemonic : CMOVPE +// Supported forms : (6 forms) +// +// * CMOVPE r16, r16 [CMOV] +// * CMOVPE m16, r16 [CMOV] +// * CMOVPE r32, r32 [CMOV] +// * CMOVPE m32, r32 [CMOV] +// * CMOVPE r64, r64 [CMOV] +// * CMOVPE m64, r64 [CMOV] +// +func (self *Program) CMOVPE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVPE", 2, Operands { v0, v1 }) + // CMOVPE r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPE m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVPE r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPE m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVPE r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPE m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVPE") + } + return p +} + +// CMOVPO performs "Move if parity odd (PF == 0)". +// +// Mnemonic : CMOVPO +// Supported forms : (6 forms) +// +// * CMOVPO r16, r16 [CMOV] +// * CMOVPO m16, r16 [CMOV] +// * CMOVPO r32, r32 [CMOV] +// * CMOVPO m32, r32 [CMOV] +// * CMOVPO r64, r64 [CMOV] +// * CMOVPO m64, r64 [CMOV] +// +func (self *Program) CMOVPO(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVPO", 2, Operands { v0, v1 }) + // CMOVPO r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPO m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVPO r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPO m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVPO r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVPO m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x4b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVPO") + } + return p +} + +// CMOVS performs "Move if sign (SF == 1)". +// +// Mnemonic : CMOVS +// Supported forms : (6 forms) +// +// * CMOVS r16, r16 [CMOV] +// * CMOVS m16, r16 [CMOV] +// * CMOVS r32, r32 [CMOV] +// * CMOVS m32, r32 [CMOV] +// * CMOVS r64, r64 [CMOV] +// * CMOVS m64, r64 [CMOV] +// +func (self *Program) CMOVS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVS", 2, Operands { v0, v1 }) + // CMOVS r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x48) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVS m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x48) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVS r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x48) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVS m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x48) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVS r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x48) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVS m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x48) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVS") + } + return p +} + +// CMOVZ performs "Move if zero (ZF == 1)". +// +// Mnemonic : CMOVZ +// Supported forms : (6 forms) +// +// * CMOVZ r16, r16 [CMOV] +// * CMOVZ m16, r16 [CMOV] +// * CMOVZ r32, r32 [CMOV] +// * CMOVZ m32, r32 [CMOV] +// * CMOVZ r64, r64 [CMOV] +// * CMOVZ m64, r64 [CMOV] +// +func (self *Program) CMOVZ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMOVZ", 2, Operands { v0, v1 }) + // CMOVZ r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVZ m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVZ r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVZ m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMOVZ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMOVZ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_CMOV) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMOVZ") + } + return p +} + +// CMPB performs "Compare Two Operands". +// +// Mnemonic : CMP +// Supported forms : (6 forms) +// +// * CMPB imm8, al +// * CMPB imm8, r8 +// * CMPB r8, r8 +// * CMPB m8, r8 +// * CMPB imm8, m8 +// * CMPB r8, m8 +// +func (self *Program) CMPB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPB", 2, Operands { v0, v1 }) + // CMPB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x3c) + m.imm1(toImmAny(v[0])) + }) + } + // CMPB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x38) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMPB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x3a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMPB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // CMPB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x38) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPB") + } + return p +} + +// CMPL performs "Compare Two Operands". +// +// Mnemonic : CMP +// Supported forms : (8 forms) +// +// * CMPL imm32, eax +// * CMPL imm8, r32 +// * CMPL imm32, r32 +// * CMPL r32, r32 +// * CMPL m32, r32 +// * CMPL imm8, m32 +// * CMPL imm32, m32 +// * CMPL r32, m32 +// +func (self *Program) CMPL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPL", 2, Operands { v0, v1 }) + // CMPL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x3d) + m.imm4(toImmAny(v[0])) + }) + } + // CMPL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xf8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // CMPL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x39) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMPL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMPL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // CMPL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(7, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // CMPL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x39) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPL") + } + return p +} + +// CMPPD performs "Compare Packed Double-Precision Floating-Point Values". +// +// Mnemonic : CMPPD +// Supported forms : (2 forms) +// +// * CMPPD imm8, xmm, xmm [SSE2] +// * CMPPD imm8, m128, xmm [SSE2] +// +func (self *Program) CMPPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("CMPPD", 3, Operands { v0, v1, v2 }) + // CMPPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CMPPD") + } + return p +} + +// CMPPS performs "Compare Packed Single-Precision Floating-Point Values". +// +// Mnemonic : CMPPS +// Supported forms : (2 forms) +// +// * CMPPS imm8, xmm, xmm [SSE] +// * CMPPS imm8, m128, xmm [SSE] +// +func (self *Program) CMPPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("CMPPS", 3, Operands { v0, v1, v2 }) + // CMPPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CMPPS") + } + return p +} + +// CMPQ performs "Compare Two Operands". +// +// Mnemonic : CMP +// Supported forms : (8 forms) +// +// * CMPQ imm32, rax +// * CMPQ imm8, r64 +// * CMPQ imm32, r64 +// * CMPQ r64, r64 +// * CMPQ m64, r64 +// * CMPQ imm8, m64 +// * CMPQ imm32, m64 +// * CMPQ r64, m64 +// +func (self *Program) CMPQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPQ", 2, Operands { v0, v1 }) + // CMPQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x3d) + m.imm4(toImmAny(v[0])) + }) + } + // CMPQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xf8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // CMPQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x39) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMPQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMPQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // CMPQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(7, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // CMPQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x39) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPQ") + } + return p +} + +// CMPSD performs "Compare Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : CMPSD +// Supported forms : (2 forms) +// +// * CMPSD imm8, xmm, xmm [SSE2] +// * CMPSD imm8, m64, xmm [SSE2] +// +func (self *Program) CMPSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("CMPSD", 3, Operands { v0, v1, v2 }) + // CMPSD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPSD imm8, m64, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CMPSD") + } + return p +} + +// CMPSS performs "Compare Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : CMPSS +// Supported forms : (2 forms) +// +// * CMPSS imm8, xmm, xmm [SSE] +// * CMPSS imm8, m32, xmm [SSE] +// +func (self *Program) CMPSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("CMPSS", 3, Operands { v0, v1, v2 }) + // CMPSS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPSS imm8, m32, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for CMPSS") + } + return p +} + +// CMPW performs "Compare Two Operands". +// +// Mnemonic : CMP +// Supported forms : (8 forms) +// +// * CMPW imm16, ax +// * CMPW imm8, r16 +// * CMPW imm16, r16 +// * CMPW r16, r16 +// * CMPW m16, r16 +// * CMPW imm8, m16 +// * CMPW imm16, m16 +// * CMPW r16, m16 +// +func (self *Program) CMPW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPW", 2, Operands { v0, v1 }) + // CMPW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x3d) + m.imm2(toImmAny(v[0])) + }) + } + // CMPW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // CMPW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xf8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // CMPW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x39) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CMPW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CMPW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // CMPW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(7, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // CMPW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x39) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPW") + } + return p +} + +// CMPXCHG16B performs "Compare and Exchange 16 Bytes". +// +// Mnemonic : CMPXCHG16B +// Supported forms : (1 form) +// +// * CMPXCHG16B m128 +// +func (self *Program) CMPXCHG16B(v0 interface{}) *Instruction { + p := self.alloc("CMPXCHG16B", 1, Operands { v0 }) + // CMPXCHG16B m128 + if isM128(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0x0f) + m.emit(0xc7) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHG16B") + } + return p +} + +// CMPXCHG8B performs "Compare and Exchange 8 Bytes". +// +// Mnemonic : CMPXCHG8B +// Supported forms : (1 form) +// +// * CMPXCHG8B m64 +// +func (self *Program) CMPXCHG8B(v0 interface{}) *Instruction { + p := self.alloc("CMPXCHG8B", 1, Operands { v0 }) + // CMPXCHG8B m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xc7) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHG8B") + } + return p +} + +// CMPXCHGB performs "Compare and Exchange". +// +// Mnemonic : CMPXCHG +// Supported forms : (2 forms) +// +// * CMPXCHGB r8, r8 +// * CMPXCHGB r8, m8 +// +func (self *Program) CMPXCHGB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPXCHGB", 2, Operands { v0, v1 }) + // CMPXCHGB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x0f) + m.emit(0xb0) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // CMPXCHGB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xb0) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHGB") + } + return p +} + +// CMPXCHGL performs "Compare and Exchange". +// +// Mnemonic : CMPXCHG +// Supported forms : (2 forms) +// +// * CMPXCHGL r32, r32 +// * CMPXCHGL r32, m32 +// +func (self *Program) CMPXCHGL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPXCHGL", 2, Operands { v0, v1 }) + // CMPXCHGL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xb1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // CMPXCHGL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xb1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHGL") + } + return p +} + +// CMPXCHGQ performs "Compare and Exchange". +// +// Mnemonic : CMPXCHG +// Supported forms : (2 forms) +// +// * CMPXCHGQ r64, r64 +// * CMPXCHGQ r64, m64 +// +func (self *Program) CMPXCHGQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPXCHGQ", 2, Operands { v0, v1 }) + // CMPXCHGQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xb1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // CMPXCHGQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xb1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHGQ") + } + return p +} + +// CMPXCHGW performs "Compare and Exchange". +// +// Mnemonic : CMPXCHG +// Supported forms : (2 forms) +// +// * CMPXCHGW r16, r16 +// * CMPXCHGW r16, m16 +// +func (self *Program) CMPXCHGW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CMPXCHGW", 2, Operands { v0, v1 }) + // CMPXCHGW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xb1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // CMPXCHGW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xb1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CMPXCHGW") + } + return p +} + +// COMISD performs "Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : COMISD +// Supported forms : (2 forms) +// +// * COMISD xmm, xmm [SSE2] +// * COMISD m64, xmm [SSE2] +// +func (self *Program) COMISD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("COMISD", 2, Operands { v0, v1 }) + // COMISD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // COMISD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for COMISD") + } + return p +} + +// COMISS performs "Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : COMISS +// Supported forms : (2 forms) +// +// * COMISS xmm, xmm [SSE] +// * COMISS m32, xmm [SSE] +// +func (self *Program) COMISS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("COMISS", 2, Operands { v0, v1 }) + // COMISS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // COMISS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for COMISS") + } + return p +} + +// CPUID performs "CPU Identification". +// +// Mnemonic : CPUID +// Supported forms : (1 form) +// +// * CPUID [CPUID] +// +func (self *Program) CPUID() *Instruction { + p := self.alloc("CPUID", 0, Operands { }) + // CPUID + self.require(ISA_CPUID) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0xa2) + }) + return p +} + +// CQTO performs "Convert Quadword to Octaword". +// +// Mnemonic : CQO +// Supported forms : (1 form) +// +// * CQTO +// +func (self *Program) CQTO() *Instruction { + p := self.alloc("CQTO", 0, Operands { }) + // CQTO + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x99) + }) + return p +} + +// CRC32B performs "Accumulate CRC32 Value". +// +// Mnemonic : CRC32 +// Supported forms : (4 forms) +// +// * CRC32B r8, r32 [SSE4.2] +// * CRC32B m8, r32 [SSE4.2] +// * CRC32B r8, r64 [SSE4.2] +// * CRC32B m8, r64 [SSE4.2] +// +func (self *Program) CRC32B(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CRC32B", 2, Operands { v0, v1 }) + // CRC32B r8, r32 + if isReg8(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32B m8, r32 + if isM8(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CRC32B r8, r64 + if isReg8(v0) && isReg64(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32B m8, r64 + if isM8(v0) && isReg64(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CRC32B") + } + return p +} + +// CRC32L performs "Accumulate CRC32 Value". +// +// Mnemonic : CRC32 +// Supported forms : (2 forms) +// +// * CRC32L r32, r32 [SSE4.2] +// * CRC32L m32, r32 [SSE4.2] +// +func (self *Program) CRC32L(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CRC32L", 2, Operands { v0, v1 }) + // CRC32L r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32L m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CRC32L") + } + return p +} + +// CRC32Q performs "Accumulate CRC32 Value". +// +// Mnemonic : CRC32 +// Supported forms : (2 forms) +// +// * CRC32Q r64, r64 [SSE4.2] +// * CRC32Q m64, r64 [SSE4.2] +// +func (self *Program) CRC32Q(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CRC32Q", 2, Operands { v0, v1 }) + // CRC32Q r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32Q m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CRC32Q") + } + return p +} + +// CRC32W performs "Accumulate CRC32 Value". +// +// Mnemonic : CRC32 +// Supported forms : (2 forms) +// +// * CRC32W r16, r32 [SSE4.2] +// * CRC32W m16, r32 [SSE4.2] +// +func (self *Program) CRC32W(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CRC32W", 2, Operands { v0, v1 }) + // CRC32W r16, r32 + if isReg16(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CRC32W m16, r32 + if isM16(v0) && isReg32(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CRC32W") + } + return p +} + +// CVTDQ2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values". +// +// Mnemonic : CVTDQ2PD +// Supported forms : (2 forms) +// +// * CVTDQ2PD xmm, xmm [SSE2] +// * CVTDQ2PD m64, xmm [SSE2] +// +func (self *Program) CVTDQ2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTDQ2PD", 2, Operands { v0, v1 }) + // CVTDQ2PD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTDQ2PD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTDQ2PD") + } + return p +} + +// CVTDQ2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values". +// +// Mnemonic : CVTDQ2PS +// Supported forms : (2 forms) +// +// * CVTDQ2PS xmm, xmm [SSE2] +// * CVTDQ2PS m128, xmm [SSE2] +// +func (self *Program) CVTDQ2PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTDQ2PS", 2, Operands { v0, v1 }) + // CVTDQ2PS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTDQ2PS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTDQ2PS") + } + return p +} + +// CVTPD2DQ performs "Convert Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTPD2DQ +// Supported forms : (2 forms) +// +// * CVTPD2DQ xmm, xmm [SSE2] +// * CVTPD2DQ m128, xmm [SSE2] +// +func (self *Program) CVTPD2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPD2DQ", 2, Operands { v0, v1 }) + // CVTPD2DQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPD2DQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPD2DQ") + } + return p +} + +// CVTPD2PI performs "Convert Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTPD2PI +// Supported forms : (2 forms) +// +// * CVTPD2PI xmm, mm [SSE] +// * CVTPD2PI m128, mm [SSE] +// +func (self *Program) CVTPD2PI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPD2PI", 2, Operands { v0, v1 }) + // CVTPD2PI xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPD2PI m128, mm + if isM128(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPD2PI") + } + return p +} + +// CVTPD2PS performs "Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values". +// +// Mnemonic : CVTPD2PS +// Supported forms : (2 forms) +// +// * CVTPD2PS xmm, xmm [SSE2] +// * CVTPD2PS m128, xmm [SSE2] +// +func (self *Program) CVTPD2PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPD2PS", 2, Operands { v0, v1 }) + // CVTPD2PS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPD2PS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPD2PS") + } + return p +} + +// CVTPI2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values". +// +// Mnemonic : CVTPI2PD +// Supported forms : (2 forms) +// +// * CVTPI2PD mm, xmm [SSE2] +// * CVTPI2PD m64, xmm [SSE2] +// +func (self *Program) CVTPI2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPI2PD", 2, Operands { v0, v1 }) + // CVTPI2PD mm, xmm + if isMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPI2PD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPI2PD") + } + return p +} + +// CVTPI2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values". +// +// Mnemonic : CVTPI2PS +// Supported forms : (2 forms) +// +// * CVTPI2PS mm, xmm [SSE] +// * CVTPI2PS m64, xmm [SSE] +// +func (self *Program) CVTPI2PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPI2PS", 2, Operands { v0, v1 }) + // CVTPI2PS mm, xmm + if isMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPI2PS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPI2PS") + } + return p +} + +// CVTPS2DQ performs "Convert Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTPS2DQ +// Supported forms : (2 forms) +// +// * CVTPS2DQ xmm, xmm [SSE2] +// * CVTPS2DQ m128, xmm [SSE2] +// +func (self *Program) CVTPS2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPS2DQ", 2, Operands { v0, v1 }) + // CVTPS2DQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPS2DQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPS2DQ") + } + return p +} + +// CVTPS2PD performs "Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values". +// +// Mnemonic : CVTPS2PD +// Supported forms : (2 forms) +// +// * CVTPS2PD xmm, xmm [SSE2] +// * CVTPS2PD m64, xmm [SSE2] +// +func (self *Program) CVTPS2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPS2PD", 2, Operands { v0, v1 }) + // CVTPS2PD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPS2PD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPS2PD") + } + return p +} + +// CVTPS2PI performs "Convert Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTPS2PI +// Supported forms : (2 forms) +// +// * CVTPS2PI xmm, mm [SSE] +// * CVTPS2PI m64, mm [SSE] +// +func (self *Program) CVTPS2PI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTPS2PI", 2, Operands { v0, v1 }) + // CVTPS2PI xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTPS2PI m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTPS2PI") + } + return p +} + +// CVTSD2SI performs "Convert Scalar Double-Precision FP Value to Integer". +// +// Mnemonic : CVTSD2SI +// Supported forms : (4 forms) +// +// * CVTSD2SI xmm, r32 [SSE2] +// * CVTSD2SI m64, r32 [SSE2] +// * CVTSD2SI xmm, r64 [SSE2] +// * CVTSD2SI m64, r64 [SSE2] +// +func (self *Program) CVTSD2SI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSD2SI", 2, Operands { v0, v1 }) + // CVTSD2SI xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSD2SI m64, r32 + if isM64(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTSD2SI xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSD2SI m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSD2SI") + } + return p +} + +// CVTSD2SS performs "Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value". +// +// Mnemonic : CVTSD2SS +// Supported forms : (2 forms) +// +// * CVTSD2SS xmm, xmm [SSE2] +// * CVTSD2SS m64, xmm [SSE2] +// +func (self *Program) CVTSD2SS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSD2SS", 2, Operands { v0, v1 }) + // CVTSD2SS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSD2SS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSD2SS") + } + return p +} + +// CVTSI2SD performs "Convert Dword Integer to Scalar Double-Precision FP Value". +// +// Mnemonic : CVTSI2SD +// Supported forms : (4 forms) +// +// * CVTSI2SD r32, xmm [SSE2] +// * CVTSI2SD r64, xmm [SSE2] +// * CVTSI2SD m32, xmm [SSE2] +// * CVTSI2SD m64, xmm [SSE2] +// +func (self *Program) CVTSI2SD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSI2SD", 2, Operands { v0, v1 }) + // CVTSI2SD r32, xmm + if isReg32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSI2SD r64, xmm + if isReg64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSI2SD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTSI2SD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSI2SD") + } + return p +} + +// CVTSI2SS performs "Convert Dword Integer to Scalar Single-Precision FP Value". +// +// Mnemonic : CVTSI2SS +// Supported forms : (4 forms) +// +// * CVTSI2SS r32, xmm [SSE] +// * CVTSI2SS r64, xmm [SSE] +// * CVTSI2SS m32, xmm [SSE] +// * CVTSI2SS m64, xmm [SSE] +// +func (self *Program) CVTSI2SS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSI2SS", 2, Operands { v0, v1 }) + // CVTSI2SS r32, xmm + if isReg32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSI2SS r64, xmm + if isReg64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSI2SS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTSI2SS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSI2SS") + } + return p +} + +// CVTSS2SD performs "Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value". +// +// Mnemonic : CVTSS2SD +// Supported forms : (2 forms) +// +// * CVTSS2SD xmm, xmm [SSE2] +// * CVTSS2SD m32, xmm [SSE2] +// +func (self *Program) CVTSS2SD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSS2SD", 2, Operands { v0, v1 }) + // CVTSS2SD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSS2SD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSS2SD") + } + return p +} + +// CVTSS2SI performs "Convert Scalar Single-Precision FP Value to Dword Integer". +// +// Mnemonic : CVTSS2SI +// Supported forms : (4 forms) +// +// * CVTSS2SI xmm, r32 [SSE] +// * CVTSS2SI m32, r32 [SSE] +// * CVTSS2SI xmm, r64 [SSE] +// * CVTSS2SI m32, r64 [SSE] +// +func (self *Program) CVTSS2SI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTSS2SI", 2, Operands { v0, v1 }) + // CVTSS2SI xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSS2SI m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTSS2SI xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTSS2SI m32, r64 + if isM32(v0) && isReg64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTSS2SI") + } + return p +} + +// CVTTPD2DQ performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTTPD2DQ +// Supported forms : (2 forms) +// +// * CVTTPD2DQ xmm, xmm [SSE2] +// * CVTTPD2DQ m128, xmm [SSE2] +// +func (self *Program) CVTTPD2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTPD2DQ", 2, Operands { v0, v1 }) + // CVTTPD2DQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTPD2DQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTPD2DQ") + } + return p +} + +// CVTTPD2PI performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTTPD2PI +// Supported forms : (2 forms) +// +// * CVTTPD2PI xmm, mm [SSE2] +// * CVTTPD2PI m128, mm [SSE2] +// +func (self *Program) CVTTPD2PI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTPD2PI", 2, Operands { v0, v1 }) + // CVTTPD2PI xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTPD2PI m128, mm + if isM128(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTPD2PI") + } + return p +} + +// CVTTPS2DQ performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTTPS2DQ +// Supported forms : (2 forms) +// +// * CVTTPS2DQ xmm, xmm [SSE2] +// * CVTTPS2DQ m128, xmm [SSE2] +// +func (self *Program) CVTTPS2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTPS2DQ", 2, Operands { v0, v1 }) + // CVTTPS2DQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTPS2DQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTPS2DQ") + } + return p +} + +// CVTTPS2PI performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : CVTTPS2PI +// Supported forms : (2 forms) +// +// * CVTTPS2PI xmm, mm [SSE] +// * CVTTPS2PI m64, mm [SSE] +// +func (self *Program) CVTTPS2PI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTPS2PI", 2, Operands { v0, v1 }) + // CVTTPS2PI xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTPS2PI m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTPS2PI") + } + return p +} + +// CVTTSD2SI performs "Convert with Truncation Scalar Double-Precision FP Value to Signed Integer". +// +// Mnemonic : CVTTSD2SI +// Supported forms : (4 forms) +// +// * CVTTSD2SI xmm, r32 [SSE2] +// * CVTTSD2SI m64, r32 [SSE2] +// * CVTTSD2SI xmm, r64 [SSE2] +// * CVTTSD2SI m64, r64 [SSE2] +// +func (self *Program) CVTTSD2SI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTSD2SI", 2, Operands { v0, v1 }) + // CVTTSD2SI xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTSD2SI m64, r32 + if isM64(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTTSD2SI xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTSD2SI m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTSD2SI") + } + return p +} + +// CVTTSS2SI performs "Convert with Truncation Scalar Single-Precision FP Value to Dword Integer". +// +// Mnemonic : CVTTSS2SI +// Supported forms : (4 forms) +// +// * CVTTSS2SI xmm, r32 [SSE] +// * CVTTSS2SI m32, r32 [SSE] +// * CVTTSS2SI xmm, r64 [SSE] +// * CVTTSS2SI m32, r64 [SSE] +// +func (self *Program) CVTTSS2SI(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("CVTTSS2SI", 2, Operands { v0, v1 }) + // CVTTSS2SI xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTSS2SI m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // CVTTSS2SI xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // CVTTSS2SI m32, r64 + if isM32(v0) && isReg64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for CVTTSS2SI") + } + return p +} + +// CWTD performs "Convert Word to Doubleword". +// +// Mnemonic : CWD +// Supported forms : (1 form) +// +// * CWTD +// +func (self *Program) CWTD() *Instruction { + p := self.alloc("CWTD", 0, Operands { }) + // CWTD + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x99) + }) + return p +} + +// CWTL performs "Convert Word to Doubleword". +// +// Mnemonic : CWDE +// Supported forms : (1 form) +// +// * CWTL +// +func (self *Program) CWTL() *Instruction { + p := self.alloc("CWTL", 0, Operands { }) + // CWTL + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x98) + }) + return p +} + +// DECB performs "Decrement by 1". +// +// Mnemonic : DEC +// Supported forms : (2 forms) +// +// * DECB r8 +// * DECB m8 +// +func (self *Program) DECB(v0 interface{}) *Instruction { + p := self.alloc("DECB", 1, Operands { v0 }) + // DECB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xfe) + m.emit(0xc8 | lcode(v[0])) + }) + } + // DECB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xfe) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DECB") + } + return p +} + +// DECL performs "Decrement by 1". +// +// Mnemonic : DEC +// Supported forms : (2 forms) +// +// * DECL r32 +// * DECL m32 +// +func (self *Program) DECL(v0 interface{}) *Instruction { + p := self.alloc("DECL", 1, Operands { v0 }) + // DECL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xc8 | lcode(v[0])) + }) + } + // DECL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DECL") + } + return p +} + +// DECQ performs "Decrement by 1". +// +// Mnemonic : DEC +// Supported forms : (2 forms) +// +// * DECQ r64 +// * DECQ m64 +// +func (self *Program) DECQ(v0 interface{}) *Instruction { + p := self.alloc("DECQ", 1, Operands { v0 }) + // DECQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xff) + m.emit(0xc8 | lcode(v[0])) + }) + } + // DECQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xff) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DECQ") + } + return p +} + +// DECW performs "Decrement by 1". +// +// Mnemonic : DEC +// Supported forms : (2 forms) +// +// * DECW r16 +// * DECW m16 +// +func (self *Program) DECW(v0 interface{}) *Instruction { + p := self.alloc("DECW", 1, Operands { v0 }) + // DECW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xc8 | lcode(v[0])) + }) + } + // DECW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DECW") + } + return p +} + +// DIVB performs "Unsigned Divide". +// +// Mnemonic : DIV +// Supported forms : (2 forms) +// +// * DIVB r8 +// * DIVB m8 +// +func (self *Program) DIVB(v0 interface{}) *Instruction { + p := self.alloc("DIVB", 1, Operands { v0 }) + // DIVB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xf0 | lcode(v[0])) + }) + } + // DIVB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVB") + } + return p +} + +// DIVL performs "Unsigned Divide". +// +// Mnemonic : DIV +// Supported forms : (2 forms) +// +// * DIVL r32 +// * DIVL m32 +// +func (self *Program) DIVL(v0 interface{}) *Instruction { + p := self.alloc("DIVL", 1, Operands { v0 }) + // DIVL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // DIVL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVL") + } + return p +} + +// DIVPD performs "Divide Packed Double-Precision Floating-Point Values". +// +// Mnemonic : DIVPD +// Supported forms : (2 forms) +// +// * DIVPD xmm, xmm [SSE2] +// * DIVPD m128, xmm [SSE2] +// +func (self *Program) DIVPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("DIVPD", 2, Operands { v0, v1 }) + // DIVPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // DIVPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVPD") + } + return p +} + +// DIVPS performs "Divide Packed Single-Precision Floating-Point Values". +// +// Mnemonic : DIVPS +// Supported forms : (2 forms) +// +// * DIVPS xmm, xmm [SSE] +// * DIVPS m128, xmm [SSE] +// +func (self *Program) DIVPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("DIVPS", 2, Operands { v0, v1 }) + // DIVPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // DIVPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVPS") + } + return p +} + +// DIVQ performs "Unsigned Divide". +// +// Mnemonic : DIV +// Supported forms : (2 forms) +// +// * DIVQ r64 +// * DIVQ m64 +// +func (self *Program) DIVQ(v0 interface{}) *Instruction { + p := self.alloc("DIVQ", 1, Operands { v0 }) + // DIVQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // DIVQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVQ") + } + return p +} + +// DIVSD performs "Divide Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : DIVSD +// Supported forms : (2 forms) +// +// * DIVSD xmm, xmm [SSE2] +// * DIVSD m64, xmm [SSE2] +// +func (self *Program) DIVSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("DIVSD", 2, Operands { v0, v1 }) + // DIVSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // DIVSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVSD") + } + return p +} + +// DIVSS performs "Divide Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : DIVSS +// Supported forms : (2 forms) +// +// * DIVSS xmm, xmm [SSE] +// * DIVSS m32, xmm [SSE] +// +func (self *Program) DIVSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("DIVSS", 2, Operands { v0, v1 }) + // DIVSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // DIVSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVSS") + } + return p +} + +// DIVW performs "Unsigned Divide". +// +// Mnemonic : DIV +// Supported forms : (2 forms) +// +// * DIVW r16 +// * DIVW m16 +// +func (self *Program) DIVW(v0 interface{}) *Instruction { + p := self.alloc("DIVW", 1, Operands { v0 }) + // DIVW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // DIVW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for DIVW") + } + return p +} + +// DPPD performs "Dot Product of Packed Double Precision Floating-Point Values". +// +// Mnemonic : DPPD +// Supported forms : (2 forms) +// +// * DPPD imm8, xmm, xmm [SSE4.1] +// * DPPD imm8, m128, xmm [SSE4.1] +// +func (self *Program) DPPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("DPPD", 3, Operands { v0, v1, v2 }) + // DPPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // DPPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x41) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for DPPD") + } + return p +} + +// DPPS performs "Dot Product of Packed Single Precision Floating-Point Values". +// +// Mnemonic : DPPS +// Supported forms : (2 forms) +// +// * DPPS imm8, xmm, xmm [SSE4.1] +// * DPPS imm8, m128, xmm [SSE4.1] +// +func (self *Program) DPPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("DPPS", 3, Operands { v0, v1, v2 }) + // DPPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // DPPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for DPPS") + } + return p +} + +// EMMS performs "Exit MMX State". +// +// Mnemonic : EMMS +// Supported forms : (1 form) +// +// * EMMS [MMX] +// +func (self *Program) EMMS() *Instruction { + p := self.alloc("EMMS", 0, Operands { }) + // EMMS + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x77) + }) + return p +} + +// EXTRACTPS performs "Extract Packed Single Precision Floating-Point Value". +// +// Mnemonic : EXTRACTPS +// Supported forms : (2 forms) +// +// * EXTRACTPS imm8, xmm, r32 [SSE4.1] +// * EXTRACTPS imm8, xmm, m32 [SSE4.1] +// +func (self *Program) EXTRACTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("EXTRACTPS", 3, Operands { v0, v1, v2 }) + // EXTRACTPS imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // EXTRACTPS imm8, xmm, m32 + if isImm8(v0) && isXMM(v1) && isM32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for EXTRACTPS") + } + return p +} + +// EXTRQ performs "Extract Field". +// +// Mnemonic : EXTRQ +// Supported forms : (2 forms) +// +// * EXTRQ xmm, xmm [SSE4A] +// * EXTRQ imm8, imm8, xmm [SSE4A] +// +func (self *Program) EXTRQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("EXTRQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("EXTRQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction EXTRQ takes 2 or 3 operands") + } + // EXTRQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // EXTRQ imm8, imm8, xmm + if len(vv) == 1 && isImm8(v0) && isImm8(v1) && isXMM(vv[0]) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[2], false) + m.emit(0x0f) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2])) + m.imm1(toImmAny(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for EXTRQ") + } + return p +} + +// FEMMS performs "Fast Exit Multimedia State". +// +// Mnemonic : FEMMS +// Supported forms : (1 form) +// +// * FEMMS [FEMMS] +// +func (self *Program) FEMMS() *Instruction { + p := self.alloc("FEMMS", 0, Operands { }) + // FEMMS + self.require(ISA_FEMMS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x0e) + }) + return p +} + +// HADDPD performs "Packed Double-FP Horizontal Add". +// +// Mnemonic : HADDPD +// Supported forms : (2 forms) +// +// * HADDPD xmm, xmm [SSE3] +// * HADDPD m128, xmm [SSE3] +// +func (self *Program) HADDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("HADDPD", 2, Operands { v0, v1 }) + // HADDPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // HADDPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for HADDPD") + } + return p +} + +// HADDPS performs "Packed Single-FP Horizontal Add". +// +// Mnemonic : HADDPS +// Supported forms : (2 forms) +// +// * HADDPS xmm, xmm [SSE3] +// * HADDPS m128, xmm [SSE3] +// +func (self *Program) HADDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("HADDPS", 2, Operands { v0, v1 }) + // HADDPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // HADDPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for HADDPS") + } + return p +} + +// HSUBPD performs "Packed Double-FP Horizontal Subtract". +// +// Mnemonic : HSUBPD +// Supported forms : (2 forms) +// +// * HSUBPD xmm, xmm [SSE3] +// * HSUBPD m128, xmm [SSE3] +// +func (self *Program) HSUBPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("HSUBPD", 2, Operands { v0, v1 }) + // HSUBPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // HSUBPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for HSUBPD") + } + return p +} + +// HSUBPS performs "Packed Single-FP Horizontal Subtract". +// +// Mnemonic : HSUBPS +// Supported forms : (2 forms) +// +// * HSUBPS xmm, xmm [SSE3] +// * HSUBPS m128, xmm [SSE3] +// +func (self *Program) HSUBPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("HSUBPS", 2, Operands { v0, v1 }) + // HSUBPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // HSUBPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for HSUBPS") + } + return p +} + +// IDIVB performs "Signed Divide". +// +// Mnemonic : IDIV +// Supported forms : (2 forms) +// +// * IDIVB r8 +// * IDIVB m8 +// +func (self *Program) IDIVB(v0 interface{}) *Instruction { + p := self.alloc("IDIVB", 1, Operands { v0 }) + // IDIVB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xf8 | lcode(v[0])) + }) + } + // IDIVB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IDIVB") + } + return p +} + +// IDIVL performs "Signed Divide". +// +// Mnemonic : IDIV +// Supported forms : (2 forms) +// +// * IDIVL r32 +// * IDIVL m32 +// +func (self *Program) IDIVL(v0 interface{}) *Instruction { + p := self.alloc("IDIVL", 1, Operands { v0 }) + // IDIVL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // IDIVL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IDIVL") + } + return p +} + +// IDIVQ performs "Signed Divide". +// +// Mnemonic : IDIV +// Supported forms : (2 forms) +// +// * IDIVQ r64 +// * IDIVQ m64 +// +func (self *Program) IDIVQ(v0 interface{}) *Instruction { + p := self.alloc("IDIVQ", 1, Operands { v0 }) + // IDIVQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // IDIVQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IDIVQ") + } + return p +} + +// IDIVW performs "Signed Divide". +// +// Mnemonic : IDIV +// Supported forms : (2 forms) +// +// * IDIVW r16 +// * IDIVW m16 +// +func (self *Program) IDIVW(v0 interface{}) *Instruction { + p := self.alloc("IDIVW", 1, Operands { v0 }) + // IDIVW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // IDIVW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IDIVW") + } + return p +} + +// IMULB performs "Signed Multiply". +// +// Mnemonic : IMUL +// Supported forms : (2 forms) +// +// * IMULB r8 +// * IMULB m8 +// +func (self *Program) IMULB(v0 interface{}) *Instruction { + p := self.alloc("IMULB", 1, Operands { v0 }) + // IMULB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xe8 | lcode(v[0])) + }) + } + // IMULB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(5, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for IMULB") + } + return p +} + +// IMULL performs "Signed Multiply". +// +// Mnemonic : IMUL +// Supported forms : (8 forms) +// +// * IMULL r32 +// * IMULL m32 +// * IMULL r32, r32 +// * IMULL m32, r32 +// * IMULL imm8, r32, r32 +// * IMULL imm32, r32, r32 +// * IMULL imm8, m32, r32 +// * IMULL imm32, m32, r32 +// +func (self *Program) IMULL(v0 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("IMULL", 1, Operands { v0 }) + case 1 : p = self.alloc("IMULL", 2, Operands { v0, vv[0] }) + case 2 : p = self.alloc("IMULL", 3, Operands { v0, vv[0], vv[1] }) + default : panic("instruction IMULL takes 1 or 2 or 3 operands") + } + // IMULL r32 + if len(vv) == 0 && isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xe8 | lcode(v[0])) + }) + } + // IMULL m32 + if len(vv) == 0 && isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(5, addr(v[0]), 1) + }) + } + // IMULL r32, r32 + if len(vv) == 1 && isReg32(v0) && isReg32(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // IMULL m32, r32 + if len(vv) == 1 && isM32(v0) && isReg32(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xaf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // IMULL imm8, r32, r32 + if len(vv) == 2 && isImm8(v0) && isReg32(vv[0]) && isReg32(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // IMULL imm32, r32, r32 + if len(vv) == 2 && isImm32(v0) && isReg32(vv[0]) && isReg32(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // IMULL imm8, m32, r32 + if len(vv) == 2 && isImm8(v0) && isM32(vv[0]) && isReg32(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // IMULL imm32, m32, r32 + if len(vv) == 2 && isImm32(v0) && isM32(vv[0]) && isReg32(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for IMULL") + } + return p +} + +// IMULQ performs "Signed Multiply". +// +// Mnemonic : IMUL +// Supported forms : (8 forms) +// +// * IMULQ r64 +// * IMULQ m64 +// * IMULQ r64, r64 +// * IMULQ m64, r64 +// * IMULQ imm8, r64, r64 +// * IMULQ imm32, r64, r64 +// * IMULQ imm8, m64, r64 +// * IMULQ imm32, m64, r64 +// +func (self *Program) IMULQ(v0 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("IMULQ", 1, Operands { v0 }) + case 1 : p = self.alloc("IMULQ", 2, Operands { v0, vv[0] }) + case 2 : p = self.alloc("IMULQ", 3, Operands { v0, vv[0], vv[1] }) + default : panic("instruction IMULQ takes 1 or 2 or 3 operands") + } + // IMULQ r64 + if len(vv) == 0 && isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xe8 | lcode(v[0])) + }) + } + // IMULQ m64 + if len(vv) == 0 && isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(5, addr(v[0]), 1) + }) + } + // IMULQ r64, r64 + if len(vv) == 1 && isReg64(v0) && isReg64(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // IMULQ m64, r64 + if len(vv) == 1 && isM64(v0) && isReg64(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xaf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // IMULQ imm8, r64, r64 + if len(vv) == 2 && isImm8(v0) && isReg64(vv[0]) && isReg64(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1])) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // IMULQ imm32, r64, r64 + if len(vv) == 2 && isImm32(v0) && isReg64(vv[0]) && isReg64(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1])) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // IMULQ imm8, m64, r64 + if len(vv) == 2 && isImm8(v0) && isM64(vv[0]) && isReg64(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[2]), addr(v[1])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // IMULQ imm32, m64, r64 + if len(vv) == 2 && isImm32(v0) && isM64(vv[0]) && isReg64(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[2]), addr(v[1])) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for IMULQ") + } + return p +} + +// IMULW performs "Signed Multiply". +// +// Mnemonic : IMUL +// Supported forms : (8 forms) +// +// * IMULW r16 +// * IMULW m16 +// * IMULW r16, r16 +// * IMULW m16, r16 +// * IMULW imm8, r16, r16 +// * IMULW imm16, r16, r16 +// * IMULW imm8, m16, r16 +// * IMULW imm16, m16, r16 +// +func (self *Program) IMULW(v0 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("IMULW", 1, Operands { v0 }) + case 1 : p = self.alloc("IMULW", 2, Operands { v0, vv[0] }) + case 2 : p = self.alloc("IMULW", 3, Operands { v0, vv[0], vv[1] }) + default : panic("instruction IMULW takes 1 or 2 or 3 operands") + } + // IMULW r16 + if len(vv) == 0 && isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xe8 | lcode(v[0])) + }) + } + // IMULW m16 + if len(vv) == 0 && isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(5, addr(v[0]), 1) + }) + } + // IMULW r16, r16 + if len(vv) == 1 && isReg16(v0) && isReg16(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // IMULW m16, r16 + if len(vv) == 1 && isM16(v0) && isReg16(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xaf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // IMULW imm8, r16, r16 + if len(vv) == 2 && isImm8(v0) && isReg16(vv[0]) && isReg16(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // IMULW imm16, r16, r16 + if len(vv) == 2 && isImm16(v0) && isReg16(vv[0]) && isReg16(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // IMULW imm8, m16, r16 + if len(vv) == 2 && isImm8(v0) && isM16(vv[0]) && isReg16(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // IMULW imm16, m16, r16 + if len(vv) == 2 && isImm16(v0) && isM16(vv[0]) && isReg16(vv[1]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for IMULW") + } + return p +} + +// INCB performs "Increment by 1". +// +// Mnemonic : INC +// Supported forms : (2 forms) +// +// * INCB r8 +// * INCB m8 +// +func (self *Program) INCB(v0 interface{}) *Instruction { + p := self.alloc("INCB", 1, Operands { v0 }) + // INCB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[0])) + }) + } + // INCB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xfe) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for INCB") + } + return p +} + +// INCL performs "Increment by 1". +// +// Mnemonic : INC +// Supported forms : (2 forms) +// +// * INCL r32 +// * INCL m32 +// +func (self *Program) INCL(v0 interface{}) *Instruction { + p := self.alloc("INCL", 1, Operands { v0 }) + // INCL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xc0 | lcode(v[0])) + }) + } + // INCL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for INCL") + } + return p +} + +// INCQ performs "Increment by 1". +// +// Mnemonic : INC +// Supported forms : (2 forms) +// +// * INCQ r64 +// * INCQ m64 +// +func (self *Program) INCQ(v0 interface{}) *Instruction { + p := self.alloc("INCQ", 1, Operands { v0 }) + // INCQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xff) + m.emit(0xc0 | lcode(v[0])) + }) + } + // INCQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xff) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for INCQ") + } + return p +} + +// INCW performs "Increment by 1". +// +// Mnemonic : INC +// Supported forms : (2 forms) +// +// * INCW r16 +// * INCW m16 +// +func (self *Program) INCW(v0 interface{}) *Instruction { + p := self.alloc("INCW", 1, Operands { v0 }) + // INCW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xc0 | lcode(v[0])) + }) + } + // INCW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for INCW") + } + return p +} + +// INSERTPS performs "Insert Packed Single Precision Floating-Point Value". +// +// Mnemonic : INSERTPS +// Supported forms : (2 forms) +// +// * INSERTPS imm8, xmm, xmm [SSE4.1] +// * INSERTPS imm8, m32, xmm [SSE4.1] +// +func (self *Program) INSERTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("INSERTPS", 3, Operands { v0, v1, v2 }) + // INSERTPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x21) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // INSERTPS imm8, m32, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x21) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for INSERTPS") + } + return p +} + +// INSERTQ performs "Insert Field". +// +// Mnemonic : INSERTQ +// Supported forms : (2 forms) +// +// * INSERTQ xmm, xmm [SSE4A] +// * INSERTQ imm8, imm8, xmm, xmm [SSE4A] +// +func (self *Program) INSERTQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("INSERTQ", 2, Operands { v0, v1 }) + case 2 : p = self.alloc("INSERTQ", 4, Operands { v0, v1, vv[0], vv[1] }) + default : panic("instruction INSERTQ takes 2 or 4 operands") + } + // INSERTQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // INSERTQ imm8, imm8, xmm, xmm + if len(vv) == 2 && isImm8(v0) && isImm8(v1) && isXMM(vv[0]) && isXMM(vv[1]) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[3]), v[2], false) + m.emit(0x0f) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for INSERTQ") + } + return p +} + +// INT performs "Call to Interrupt Procedure". +// +// Mnemonic : INT +// Supported forms : (2 forms) +// +// * INT 3 +// * INT imm8 +// +func (self *Program) INT(v0 interface{}) *Instruction { + p := self.alloc("INT", 1, Operands { v0 }) + // INT 3 + if isConst3(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xcc) + }) + } + // INT imm8 + if isImm8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xcd) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for INT") + } + return p +} + +// JA performs "Jump if above (CF == 0 and ZF == 0)". +// +// Mnemonic : JA +// Supported forms : (2 forms) +// +// * JA rel8 +// * JA rel32 +// +func (self *Program) JA(v0 interface{}) *Instruction { + p := self.alloc("JA", 1, Operands { v0 }) + p.branch = _B_conditional + // JA rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x77) + m.imm1(relv(v[0])) + }) + } + // JA rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x87) + m.imm4(relv(v[0])) + }) + } + // JA label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x77) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x87) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JA") + } + return p +} + +// JAE performs "Jump if above or equal (CF == 0)". +// +// Mnemonic : JAE +// Supported forms : (2 forms) +// +// * JAE rel8 +// * JAE rel32 +// +func (self *Program) JAE(v0 interface{}) *Instruction { + p := self.alloc("JAE", 1, Operands { v0 }) + p.branch = _B_conditional + // JAE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + } + // JAE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + // JAE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JAE") + } + return p +} + +// JB performs "Jump if below (CF == 1)". +// +// Mnemonic : JB +// Supported forms : (2 forms) +// +// * JB rel8 +// * JB rel32 +// +func (self *Program) JB(v0 interface{}) *Instruction { + p := self.alloc("JB", 1, Operands { v0 }) + p.branch = _B_conditional + // JB rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + } + // JB rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + // JB label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JB") + } + return p +} + +// JBE performs "Jump if below or equal (CF == 1 or ZF == 1)". +// +// Mnemonic : JBE +// Supported forms : (2 forms) +// +// * JBE rel8 +// * JBE rel32 +// +func (self *Program) JBE(v0 interface{}) *Instruction { + p := self.alloc("JBE", 1, Operands { v0 }) + p.branch = _B_conditional + // JBE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x76) + m.imm1(relv(v[0])) + }) + } + // JBE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x86) + m.imm4(relv(v[0])) + }) + } + // JBE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x76) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x86) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JBE") + } + return p +} + +// JC performs "Jump if carry (CF == 1)". +// +// Mnemonic : JC +// Supported forms : (2 forms) +// +// * JC rel8 +// * JC rel32 +// +func (self *Program) JC(v0 interface{}) *Instruction { + p := self.alloc("JC", 1, Operands { v0 }) + p.branch = _B_conditional + // JC rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + } + // JC rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + // JC label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JC") + } + return p +} + +// JE performs "Jump if equal (ZF == 1)". +// +// Mnemonic : JE +// Supported forms : (2 forms) +// +// * JE rel8 +// * JE rel32 +// +func (self *Program) JE(v0 interface{}) *Instruction { + p := self.alloc("JE", 1, Operands { v0 }) + p.branch = _B_conditional + // JE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x74) + m.imm1(relv(v[0])) + }) + } + // JE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x84) + m.imm4(relv(v[0])) + }) + } + // JE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x74) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x84) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JE") + } + return p +} + +// JECXZ performs "Jump if ECX register is 0". +// +// Mnemonic : JECXZ +// Supported forms : (1 form) +// +// * JECXZ rel8 +// +func (self *Program) JECXZ(v0 interface{}) *Instruction { + p := self.alloc("JECXZ", 1, Operands { v0 }) + p.branch = _B_conditional + // JECXZ rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xe3) + m.imm1(relv(v[0])) + }) + } + // JECXZ label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0xe3) + m.imm1(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JECXZ") + } + return p +} + +// JG performs "Jump if greater (ZF == 0 and SF == OF)". +// +// Mnemonic : JG +// Supported forms : (2 forms) +// +// * JG rel8 +// * JG rel32 +// +func (self *Program) JG(v0 interface{}) *Instruction { + p := self.alloc("JG", 1, Operands { v0 }) + p.branch = _B_conditional + // JG rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7f) + m.imm1(relv(v[0])) + }) + } + // JG rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8f) + m.imm4(relv(v[0])) + }) + } + // JG label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7f) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8f) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JG") + } + return p +} + +// JGE performs "Jump if greater or equal (SF == OF)". +// +// Mnemonic : JGE +// Supported forms : (2 forms) +// +// * JGE rel8 +// * JGE rel32 +// +func (self *Program) JGE(v0 interface{}) *Instruction { + p := self.alloc("JGE", 1, Operands { v0 }) + p.branch = _B_conditional + // JGE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7d) + m.imm1(relv(v[0])) + }) + } + // JGE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8d) + m.imm4(relv(v[0])) + }) + } + // JGE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7d) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8d) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JGE") + } + return p +} + +// JL performs "Jump if less (SF != OF)". +// +// Mnemonic : JL +// Supported forms : (2 forms) +// +// * JL rel8 +// * JL rel32 +// +func (self *Program) JL(v0 interface{}) *Instruction { + p := self.alloc("JL", 1, Operands { v0 }) + p.branch = _B_conditional + // JL rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7c) + m.imm1(relv(v[0])) + }) + } + // JL rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8c) + m.imm4(relv(v[0])) + }) + } + // JL label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7c) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8c) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JL") + } + return p +} + +// JLE performs "Jump if less or equal (ZF == 1 or SF != OF)". +// +// Mnemonic : JLE +// Supported forms : (2 forms) +// +// * JLE rel8 +// * JLE rel32 +// +func (self *Program) JLE(v0 interface{}) *Instruction { + p := self.alloc("JLE", 1, Operands { v0 }) + p.branch = _B_conditional + // JLE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7e) + m.imm1(relv(v[0])) + }) + } + // JLE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8e) + m.imm4(relv(v[0])) + }) + } + // JLE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7e) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8e) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JLE") + } + return p +} + +// JMP performs "Jump Unconditionally". +// +// Mnemonic : JMP +// Supported forms : (2 forms) +// +// * JMP rel8 +// * JMP rel32 +// +func (self *Program) JMP(v0 interface{}) *Instruction { + p := self.alloc("JMP", 1, Operands { v0 }) + p.branch = _B_unconditional + // JMP rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xeb) + m.imm1(relv(v[0])) + }) + } + // JMP rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xe9) + m.imm4(relv(v[0])) + }) + } + // JMP label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0xeb) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0xe9) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JMP") + } + return p +} + +// JMPQ performs "Jump Unconditionally". +// +// Mnemonic : JMP +// Supported forms : (2 forms) +// +// * JMPQ r64 +// * JMPQ m64 +// +func (self *Program) JMPQ(v0 interface{}) *Instruction { + p := self.alloc("JMPQ", 1, Operands { v0 }) + // JMPQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xe0 | lcode(v[0])) + }) + } + // JMPQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for JMPQ") + } + return p +} + +// JNA performs "Jump if not above (CF == 1 or ZF == 1)". +// +// Mnemonic : JNA +// Supported forms : (2 forms) +// +// * JNA rel8 +// * JNA rel32 +// +func (self *Program) JNA(v0 interface{}) *Instruction { + p := self.alloc("JNA", 1, Operands { v0 }) + p.branch = _B_conditional + // JNA rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x76) + m.imm1(relv(v[0])) + }) + } + // JNA rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x86) + m.imm4(relv(v[0])) + }) + } + // JNA label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x76) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x86) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNA") + } + return p +} + +// JNAE performs "Jump if not above or equal (CF == 1)". +// +// Mnemonic : JNAE +// Supported forms : (2 forms) +// +// * JNAE rel8 +// * JNAE rel32 +// +func (self *Program) JNAE(v0 interface{}) *Instruction { + p := self.alloc("JNAE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNAE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + } + // JNAE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + // JNAE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x72) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x82) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNAE") + } + return p +} + +// JNB performs "Jump if not below (CF == 0)". +// +// Mnemonic : JNB +// Supported forms : (2 forms) +// +// * JNB rel8 +// * JNB rel32 +// +func (self *Program) JNB(v0 interface{}) *Instruction { + p := self.alloc("JNB", 1, Operands { v0 }) + p.branch = _B_conditional + // JNB rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + } + // JNB rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + // JNB label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNB") + } + return p +} + +// JNBE performs "Jump if not below or equal (CF == 0 and ZF == 0)". +// +// Mnemonic : JNBE +// Supported forms : (2 forms) +// +// * JNBE rel8 +// * JNBE rel32 +// +func (self *Program) JNBE(v0 interface{}) *Instruction { + p := self.alloc("JNBE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNBE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x77) + m.imm1(relv(v[0])) + }) + } + // JNBE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x87) + m.imm4(relv(v[0])) + }) + } + // JNBE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x77) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x87) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNBE") + } + return p +} + +// JNC performs "Jump if not carry (CF == 0)". +// +// Mnemonic : JNC +// Supported forms : (2 forms) +// +// * JNC rel8 +// * JNC rel32 +// +func (self *Program) JNC(v0 interface{}) *Instruction { + p := self.alloc("JNC", 1, Operands { v0 }) + p.branch = _B_conditional + // JNC rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + } + // JNC rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + // JNC label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x73) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x83) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNC") + } + return p +} + +// JNE performs "Jump if not equal (ZF == 0)". +// +// Mnemonic : JNE +// Supported forms : (2 forms) +// +// * JNE rel8 +// * JNE rel32 +// +func (self *Program) JNE(v0 interface{}) *Instruction { + p := self.alloc("JNE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x75) + m.imm1(relv(v[0])) + }) + } + // JNE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x85) + m.imm4(relv(v[0])) + }) + } + // JNE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x75) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x85) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNE") + } + return p +} + +// JNG performs "Jump if not greater (ZF == 1 or SF != OF)". +// +// Mnemonic : JNG +// Supported forms : (2 forms) +// +// * JNG rel8 +// * JNG rel32 +// +func (self *Program) JNG(v0 interface{}) *Instruction { + p := self.alloc("JNG", 1, Operands { v0 }) + p.branch = _B_conditional + // JNG rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7e) + m.imm1(relv(v[0])) + }) + } + // JNG rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8e) + m.imm4(relv(v[0])) + }) + } + // JNG label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7e) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8e) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNG") + } + return p +} + +// JNGE performs "Jump if not greater or equal (SF != OF)". +// +// Mnemonic : JNGE +// Supported forms : (2 forms) +// +// * JNGE rel8 +// * JNGE rel32 +// +func (self *Program) JNGE(v0 interface{}) *Instruction { + p := self.alloc("JNGE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNGE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7c) + m.imm1(relv(v[0])) + }) + } + // JNGE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8c) + m.imm4(relv(v[0])) + }) + } + // JNGE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7c) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8c) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNGE") + } + return p +} + +// JNL performs "Jump if not less (SF == OF)". +// +// Mnemonic : JNL +// Supported forms : (2 forms) +// +// * JNL rel8 +// * JNL rel32 +// +func (self *Program) JNL(v0 interface{}) *Instruction { + p := self.alloc("JNL", 1, Operands { v0 }) + p.branch = _B_conditional + // JNL rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7d) + m.imm1(relv(v[0])) + }) + } + // JNL rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8d) + m.imm4(relv(v[0])) + }) + } + // JNL label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7d) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8d) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNL") + } + return p +} + +// JNLE performs "Jump if not less or equal (ZF == 0 and SF == OF)". +// +// Mnemonic : JNLE +// Supported forms : (2 forms) +// +// * JNLE rel8 +// * JNLE rel32 +// +func (self *Program) JNLE(v0 interface{}) *Instruction { + p := self.alloc("JNLE", 1, Operands { v0 }) + p.branch = _B_conditional + // JNLE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7f) + m.imm1(relv(v[0])) + }) + } + // JNLE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8f) + m.imm4(relv(v[0])) + }) + } + // JNLE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7f) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8f) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNLE") + } + return p +} + +// JNO performs "Jump if not overflow (OF == 0)". +// +// Mnemonic : JNO +// Supported forms : (2 forms) +// +// * JNO rel8 +// * JNO rel32 +// +func (self *Program) JNO(v0 interface{}) *Instruction { + p := self.alloc("JNO", 1, Operands { v0 }) + p.branch = _B_conditional + // JNO rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x71) + m.imm1(relv(v[0])) + }) + } + // JNO rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x81) + m.imm4(relv(v[0])) + }) + } + // JNO label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x71) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x81) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNO") + } + return p +} + +// JNP performs "Jump if not parity (PF == 0)". +// +// Mnemonic : JNP +// Supported forms : (2 forms) +// +// * JNP rel8 +// * JNP rel32 +// +func (self *Program) JNP(v0 interface{}) *Instruction { + p := self.alloc("JNP", 1, Operands { v0 }) + p.branch = _B_conditional + // JNP rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7b) + m.imm1(relv(v[0])) + }) + } + // JNP rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8b) + m.imm4(relv(v[0])) + }) + } + // JNP label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7b) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8b) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNP") + } + return p +} + +// JNS performs "Jump if not sign (SF == 0)". +// +// Mnemonic : JNS +// Supported forms : (2 forms) +// +// * JNS rel8 +// * JNS rel32 +// +func (self *Program) JNS(v0 interface{}) *Instruction { + p := self.alloc("JNS", 1, Operands { v0 }) + p.branch = _B_conditional + // JNS rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x79) + m.imm1(relv(v[0])) + }) + } + // JNS rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x89) + m.imm4(relv(v[0])) + }) + } + // JNS label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x79) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x89) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNS") + } + return p +} + +// JNZ performs "Jump if not zero (ZF == 0)". +// +// Mnemonic : JNZ +// Supported forms : (2 forms) +// +// * JNZ rel8 +// * JNZ rel32 +// +func (self *Program) JNZ(v0 interface{}) *Instruction { + p := self.alloc("JNZ", 1, Operands { v0 }) + p.branch = _B_conditional + // JNZ rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x75) + m.imm1(relv(v[0])) + }) + } + // JNZ rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x85) + m.imm4(relv(v[0])) + }) + } + // JNZ label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x75) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x85) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JNZ") + } + return p +} + +// JO performs "Jump if overflow (OF == 1)". +// +// Mnemonic : JO +// Supported forms : (2 forms) +// +// * JO rel8 +// * JO rel32 +// +func (self *Program) JO(v0 interface{}) *Instruction { + p := self.alloc("JO", 1, Operands { v0 }) + p.branch = _B_conditional + // JO rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x70) + m.imm1(relv(v[0])) + }) + } + // JO rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x80) + m.imm4(relv(v[0])) + }) + } + // JO label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x70) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x80) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JO") + } + return p +} + +// JP performs "Jump if parity (PF == 1)". +// +// Mnemonic : JP +// Supported forms : (2 forms) +// +// * JP rel8 +// * JP rel32 +// +func (self *Program) JP(v0 interface{}) *Instruction { + p := self.alloc("JP", 1, Operands { v0 }) + p.branch = _B_conditional + // JP rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7a) + m.imm1(relv(v[0])) + }) + } + // JP rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8a) + m.imm4(relv(v[0])) + }) + } + // JP label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7a) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8a) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JP") + } + return p +} + +// JPE performs "Jump if parity even (PF == 1)". +// +// Mnemonic : JPE +// Supported forms : (2 forms) +// +// * JPE rel8 +// * JPE rel32 +// +func (self *Program) JPE(v0 interface{}) *Instruction { + p := self.alloc("JPE", 1, Operands { v0 }) + p.branch = _B_conditional + // JPE rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7a) + m.imm1(relv(v[0])) + }) + } + // JPE rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8a) + m.imm4(relv(v[0])) + }) + } + // JPE label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7a) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8a) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JPE") + } + return p +} + +// JPO performs "Jump if parity odd (PF == 0)". +// +// Mnemonic : JPO +// Supported forms : (2 forms) +// +// * JPO rel8 +// * JPO rel32 +// +func (self *Program) JPO(v0 interface{}) *Instruction { + p := self.alloc("JPO", 1, Operands { v0 }) + p.branch = _B_conditional + // JPO rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x7b) + m.imm1(relv(v[0])) + }) + } + // JPO rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8b) + m.imm4(relv(v[0])) + }) + } + // JPO label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x7b) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x8b) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JPO") + } + return p +} + +// JRCXZ performs "Jump if RCX register is 0". +// +// Mnemonic : JRCXZ +// Supported forms : (1 form) +// +// * JRCXZ rel8 +// +func (self *Program) JRCXZ(v0 interface{}) *Instruction { + p := self.alloc("JRCXZ", 1, Operands { v0 }) + p.branch = _B_conditional + // JRCXZ rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xe3) + m.imm1(relv(v[0])) + }) + } + // JRCXZ label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0xe3) + m.imm1(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JRCXZ") + } + return p +} + +// JS performs "Jump if sign (SF == 1)". +// +// Mnemonic : JS +// Supported forms : (2 forms) +// +// * JS rel8 +// * JS rel32 +// +func (self *Program) JS(v0 interface{}) *Instruction { + p := self.alloc("JS", 1, Operands { v0 }) + p.branch = _B_conditional + // JS rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x78) + m.imm1(relv(v[0])) + }) + } + // JS rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x88) + m.imm4(relv(v[0])) + }) + } + // JS label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x78) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x88) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JS") + } + return p +} + +// JZ performs "Jump if zero (ZF == 1)". +// +// Mnemonic : JZ +// Supported forms : (2 forms) +// +// * JZ rel8 +// * JZ rel32 +// +func (self *Program) JZ(v0 interface{}) *Instruction { + p := self.alloc("JZ", 1, Operands { v0 }) + p.branch = _B_conditional + // JZ rel8 + if isRel8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x74) + m.imm1(relv(v[0])) + }) + } + // JZ rel32 + if isRel32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x84) + m.imm4(relv(v[0])) + }) + } + // JZ label + if isLabel(v0) { + p.add(_F_rel1, func(m *_Encoding, v []interface{}) { + m.emit(0x74) + m.imm1(relv(v[0])) + }) + p.add(_F_rel4, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x84) + m.imm4(relv(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for JZ") + } + return p +} + +// KADDB performs "ADD Two 8-bit Masks". +// +// Mnemonic : KADDB +// Supported forms : (1 form) +// +// * KADDB k, k, k [AVX512DQ] +// +func (self *Program) KADDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KADDB", 3, Operands { v0, v1, v2 }) + // KADDB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KADDB") + } + return p +} + +// KADDD performs "ADD Two 32-bit Masks". +// +// Mnemonic : KADDD +// Supported forms : (1 form) +// +// * KADDD k, k, k [AVX512BW] +// +func (self *Program) KADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KADDD", 3, Operands { v0, v1, v2 }) + // KADDD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KADDD") + } + return p +} + +// KADDQ performs "ADD Two 64-bit Masks". +// +// Mnemonic : KADDQ +// Supported forms : (1 form) +// +// * KADDQ k, k, k [AVX512BW] +// +func (self *Program) KADDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KADDQ", 3, Operands { v0, v1, v2 }) + // KADDQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KADDQ") + } + return p +} + +// KADDW performs "ADD Two 16-bit Masks". +// +// Mnemonic : KADDW +// Supported forms : (1 form) +// +// * KADDW k, k, k [AVX512DQ] +// +func (self *Program) KADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KADDW", 3, Operands { v0, v1, v2 }) + // KADDW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KADDW") + } + return p +} + +// KANDB performs "Bitwise Logical AND 8-bit Masks". +// +// Mnemonic : KANDB +// Supported forms : (1 form) +// +// * KANDB k, k, k [AVX512DQ] +// +func (self *Program) KANDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDB", 3, Operands { v0, v1, v2 }) + // KANDB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDB") + } + return p +} + +// KANDD performs "Bitwise Logical AND 32-bit Masks". +// +// Mnemonic : KANDD +// Supported forms : (1 form) +// +// * KANDD k, k, k [AVX512BW] +// +func (self *Program) KANDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDD", 3, Operands { v0, v1, v2 }) + // KANDD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDD") + } + return p +} + +// KANDNB performs "Bitwise Logical AND NOT 8-bit Masks". +// +// Mnemonic : KANDNB +// Supported forms : (1 form) +// +// * KANDNB k, k, k [AVX512DQ] +// +func (self *Program) KANDNB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDNB", 3, Operands { v0, v1, v2 }) + // KANDNB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDNB") + } + return p +} + +// KANDND performs "Bitwise Logical AND NOT 32-bit Masks". +// +// Mnemonic : KANDND +// Supported forms : (1 form) +// +// * KANDND k, k, k [AVX512BW] +// +func (self *Program) KANDND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDND", 3, Operands { v0, v1, v2 }) + // KANDND k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDND") + } + return p +} + +// KANDNQ performs "Bitwise Logical AND NOT 64-bit Masks". +// +// Mnemonic : KANDNQ +// Supported forms : (1 form) +// +// * KANDNQ k, k, k [AVX512BW] +// +func (self *Program) KANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDNQ", 3, Operands { v0, v1, v2 }) + // KANDNQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDNQ") + } + return p +} + +// KANDNW performs "Bitwise Logical AND NOT 16-bit Masks". +// +// Mnemonic : KANDNW +// Supported forms : (1 form) +// +// * KANDNW k, k, k [AVX512F] +// +func (self *Program) KANDNW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDNW", 3, Operands { v0, v1, v2 }) + // KANDNW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDNW") + } + return p +} + +// KANDQ performs "Bitwise Logical AND 64-bit Masks". +// +// Mnemonic : KANDQ +// Supported forms : (1 form) +// +// * KANDQ k, k, k [AVX512BW] +// +func (self *Program) KANDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDQ", 3, Operands { v0, v1, v2 }) + // KANDQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDQ") + } + return p +} + +// KANDW performs "Bitwise Logical AND 16-bit Masks". +// +// Mnemonic : KANDW +// Supported forms : (1 form) +// +// * KANDW k, k, k [AVX512F] +// +func (self *Program) KANDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KANDW", 3, Operands { v0, v1, v2 }) + // KANDW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x41) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KANDW") + } + return p +} + +// KMOVB performs "Move 8-bit Mask". +// +// Mnemonic : KMOVB +// Supported forms : (5 forms) +// +// * KMOVB k, k [AVX512DQ] +// * KMOVB r32, k [AVX512DQ] +// * KMOVB m8, k [AVX512DQ] +// * KMOVB k, r32 [AVX512DQ] +// * KMOVB k, m8 [AVX512DQ] +// +func (self *Program) KMOVB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KMOVB", 2, Operands { v0, v1 }) + // KMOVB k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, nil, 0) + m.emit(0x90) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVB r32, k + if isReg32(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[0], 0) + m.emit(0x92) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVB m8, k + if isM8(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, addr(v[0]), 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // KMOVB k, r32 + if isK(v0) && isReg32(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), nil, 0) + m.emit(0x93) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVB k, m8 + if isK(v0) && isM8(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, addr(v[1]), 0) + m.emit(0x91) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for KMOVB") + } + return p +} + +// KMOVD performs "Move 32-bit Mask". +// +// Mnemonic : KMOVD +// Supported forms : (5 forms) +// +// * KMOVD k, k [AVX512BW] +// * KMOVD r32, k [AVX512BW] +// * KMOVD m32, k [AVX512BW] +// * KMOVD k, r32 [AVX512BW] +// * KMOVD k, m32 [AVX512BW] +// +func (self *Program) KMOVD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KMOVD", 2, Operands { v0, v1 }) + // KMOVD k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf9) + m.emit(0x90) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVD r32, k + if isReg32(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, 0, v[0], 0) + m.emit(0x92) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVD m32, k + if isM32(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x81, 0, addr(v[0]), 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // KMOVD k, r32 + if isK(v0) && isReg32(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), nil, 0) + m.emit(0x93) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVD k, m32 + if isK(v0) && isM32(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x81, 0, addr(v[1]), 0) + m.emit(0x91) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for KMOVD") + } + return p +} + +// KMOVQ performs "Move 64-bit Mask". +// +// Mnemonic : KMOVQ +// Supported forms : (5 forms) +// +// * KMOVQ k, k [AVX512BW] +// * KMOVQ r64, k [AVX512BW] +// * KMOVQ m64, k [AVX512BW] +// * KMOVQ k, r64 [AVX512BW] +// * KMOVQ k, m64 [AVX512BW] +// +func (self *Program) KMOVQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KMOVQ", 2, Operands { v0, v1 }) + // KMOVQ k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf8) + m.emit(0x90) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVQ r64, k + if isReg64(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[0]) << 5)) + m.emit(0xfb) + m.emit(0x92) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVQ m64, k + if isM64(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x80, 0, addr(v[0]), 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // KMOVQ k, r64 + if isK(v0) && isReg64(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7)) + m.emit(0xfb) + m.emit(0x93) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVQ k, m64 + if isK(v0) && isM64(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x80, 0, addr(v[1]), 0) + m.emit(0x91) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for KMOVQ") + } + return p +} + +// KMOVW performs "Move 16-bit Mask". +// +// Mnemonic : KMOVW +// Supported forms : (5 forms) +// +// * KMOVW k, k [AVX512F] +// * KMOVW r32, k [AVX512F] +// * KMOVW m16, k [AVX512F] +// * KMOVW k, r32 [AVX512F] +// * KMOVW k, m16 [AVX512F] +// +func (self *Program) KMOVW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KMOVW", 2, Operands { v0, v1 }) + // KMOVW k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x90) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVW r32, k + if isReg32(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, v[0], 0) + m.emit(0x92) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVW m16, k + if isM16(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, addr(v[0]), 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // KMOVW k, r32 + if isK(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), nil, 0) + m.emit(0x93) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // KMOVW k, m16 + if isK(v0) && isM16(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, addr(v[1]), 0) + m.emit(0x91) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for KMOVW") + } + return p +} + +// KNOTB performs "NOT 8-bit Mask Register". +// +// Mnemonic : KNOTB +// Supported forms : (1 form) +// +// * KNOTB k, k [AVX512DQ] +// +func (self *Program) KNOTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KNOTB", 2, Operands { v0, v1 }) + // KNOTB k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, nil, 0) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KNOTB") + } + return p +} + +// KNOTD performs "NOT 32-bit Mask Register". +// +// Mnemonic : KNOTD +// Supported forms : (1 form) +// +// * KNOTD k, k [AVX512BW] +// +func (self *Program) KNOTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KNOTD", 2, Operands { v0, v1 }) + // KNOTD k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf9) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KNOTD") + } + return p +} + +// KNOTQ performs "NOT 64-bit Mask Register". +// +// Mnemonic : KNOTQ +// Supported forms : (1 form) +// +// * KNOTQ k, k [AVX512BW] +// +func (self *Program) KNOTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KNOTQ", 2, Operands { v0, v1 }) + // KNOTQ k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf8) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KNOTQ") + } + return p +} + +// KNOTW performs "NOT 16-bit Mask Register". +// +// Mnemonic : KNOTW +// Supported forms : (1 form) +// +// * KNOTW k, k [AVX512F] +// +func (self *Program) KNOTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KNOTW", 2, Operands { v0, v1 }) + // KNOTW k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KNOTW") + } + return p +} + +// KORB performs "Bitwise Logical OR 8-bit Masks". +// +// Mnemonic : KORB +// Supported forms : (1 form) +// +// * KORB k, k, k [AVX512DQ] +// +func (self *Program) KORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KORB", 3, Operands { v0, v1, v2 }) + // KORB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORB") + } + return p +} + +// KORD performs "Bitwise Logical OR 32-bit Masks". +// +// Mnemonic : KORD +// Supported forms : (1 form) +// +// * KORD k, k, k [AVX512BW] +// +func (self *Program) KORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KORD", 3, Operands { v0, v1, v2 }) + // KORD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORD") + } + return p +} + +// KORQ performs "Bitwise Logical OR 64-bit Masks". +// +// Mnemonic : KORQ +// Supported forms : (1 form) +// +// * KORQ k, k, k [AVX512BW] +// +func (self *Program) KORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KORQ", 3, Operands { v0, v1, v2 }) + // KORQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORQ") + } + return p +} + +// KORTESTB performs "OR 8-bit Masks and Set Flags". +// +// Mnemonic : KORTESTB +// Supported forms : (1 form) +// +// * KORTESTB k, k [AVX512DQ] +// +func (self *Program) KORTESTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KORTESTB", 2, Operands { v0, v1 }) + // KORTESTB k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, nil, 0) + m.emit(0x98) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORTESTB") + } + return p +} + +// KORTESTD performs "OR 32-bit Masks and Set Flags". +// +// Mnemonic : KORTESTD +// Supported forms : (1 form) +// +// * KORTESTD k, k [AVX512BW] +// +func (self *Program) KORTESTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KORTESTD", 2, Operands { v0, v1 }) + // KORTESTD k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf9) + m.emit(0x98) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORTESTD") + } + return p +} + +// KORTESTQ performs "OR 64-bit Masks and Set Flags". +// +// Mnemonic : KORTESTQ +// Supported forms : (1 form) +// +// * KORTESTQ k, k [AVX512BW] +// +func (self *Program) KORTESTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KORTESTQ", 2, Operands { v0, v1 }) + // KORTESTQ k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf8) + m.emit(0x98) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORTESTQ") + } + return p +} + +// KORTESTW performs "OR 16-bit Masks and Set Flags". +// +// Mnemonic : KORTESTW +// Supported forms : (1 form) +// +// * KORTESTW k, k [AVX512F] +// +func (self *Program) KORTESTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KORTESTW", 2, Operands { v0, v1 }) + // KORTESTW k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x98) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORTESTW") + } + return p +} + +// KORW performs "Bitwise Logical OR 16-bit Masks". +// +// Mnemonic : KORW +// Supported forms : (1 form) +// +// * KORW k, k, k [AVX512F] +// +func (self *Program) KORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KORW", 3, Operands { v0, v1, v2 }) + // KORW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KORW") + } + return p +} + +// KSHIFTLB performs "Shift Left 8-bit Masks". +// +// Mnemonic : KSHIFTLB +// Supported forms : (1 form) +// +// * KSHIFTLB imm8, k, k [AVX512DQ] +// +func (self *Program) KSHIFTLB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTLB", 3, Operands { v0, v1, v2 }) + // KSHIFTLB imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0x79) + m.emit(0x32) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTLB") + } + return p +} + +// KSHIFTLD performs "Shift Left 32-bit Masks". +// +// Mnemonic : KSHIFTLD +// Supported forms : (1 form) +// +// * KSHIFTLD imm8, k, k [AVX512BW] +// +func (self *Program) KSHIFTLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTLD", 3, Operands { v0, v1, v2 }) + // KSHIFTLD imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0x79) + m.emit(0x33) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTLD") + } + return p +} + +// KSHIFTLQ performs "Shift Left 64-bit Masks". +// +// Mnemonic : KSHIFTLQ +// Supported forms : (1 form) +// +// * KSHIFTLQ imm8, k, k [AVX512BW] +// +func (self *Program) KSHIFTLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTLQ", 3, Operands { v0, v1, v2 }) + // KSHIFTLQ imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0xf9) + m.emit(0x33) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTLQ") + } + return p +} + +// KSHIFTLW performs "Shift Left 16-bit Masks". +// +// Mnemonic : KSHIFTLW +// Supported forms : (1 form) +// +// * KSHIFTLW imm8, k, k [AVX512F] +// +func (self *Program) KSHIFTLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTLW", 3, Operands { v0, v1, v2 }) + // KSHIFTLW imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0xf9) + m.emit(0x32) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTLW") + } + return p +} + +// KSHIFTRB performs "Shift Right 8-bit Masks". +// +// Mnemonic : KSHIFTRB +// Supported forms : (1 form) +// +// * KSHIFTRB imm8, k, k [AVX512DQ] +// +func (self *Program) KSHIFTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTRB", 3, Operands { v0, v1, v2 }) + // KSHIFTRB imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0x79) + m.emit(0x30) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTRB") + } + return p +} + +// KSHIFTRD performs "Shift Right 32-bit Masks". +// +// Mnemonic : KSHIFTRD +// Supported forms : (1 form) +// +// * KSHIFTRD imm8, k, k [AVX512BW] +// +func (self *Program) KSHIFTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTRD", 3, Operands { v0, v1, v2 }) + // KSHIFTRD imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0x79) + m.emit(0x31) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTRD") + } + return p +} + +// KSHIFTRQ performs "Shift Right 64-bit Masks". +// +// Mnemonic : KSHIFTRQ +// Supported forms : (1 form) +// +// * KSHIFTRQ imm8, k, k [AVX512BW] +// +func (self *Program) KSHIFTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTRQ", 3, Operands { v0, v1, v2 }) + // KSHIFTRQ imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0xf9) + m.emit(0x31) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTRQ") + } + return p +} + +// KSHIFTRW performs "Shift Right 16-bit Masks". +// +// Mnemonic : KSHIFTRW +// Supported forms : (1 form) +// +// * KSHIFTRW imm8, k, k [AVX512F] +// +func (self *Program) KSHIFTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KSHIFTRW", 3, Operands { v0, v1, v2 }) + // KSHIFTRW imm8, k, k + if isImm8(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3) + m.emit(0xf9) + m.emit(0x30) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KSHIFTRW") + } + return p +} + +// KTESTB performs "Bit Test 8-bit Masks and Set Flags". +// +// Mnemonic : KTESTB +// Supported forms : (1 form) +// +// * KTESTB k, k [AVX512DQ] +// +func (self *Program) KTESTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KTESTB", 2, Operands { v0, v1 }) + // KTESTB k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, nil, 0) + m.emit(0x99) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KTESTB") + } + return p +} + +// KTESTD performs "Bit Test 32-bit Masks and Set Flags". +// +// Mnemonic : KTESTD +// Supported forms : (1 form) +// +// * KTESTD k, k [AVX512BW] +// +func (self *Program) KTESTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KTESTD", 2, Operands { v0, v1 }) + // KTESTD k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf9) + m.emit(0x99) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KTESTD") + } + return p +} + +// KTESTQ performs "Bit Test 64-bit Masks and Set Flags". +// +// Mnemonic : KTESTQ +// Supported forms : (1 form) +// +// * KTESTQ k, k [AVX512BW] +// +func (self *Program) KTESTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KTESTQ", 2, Operands { v0, v1 }) + // KTESTQ k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xf8) + m.emit(0x99) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KTESTQ") + } + return p +} + +// KTESTW performs "Bit Test 16-bit Masks and Set Flags". +// +// Mnemonic : KTESTW +// Supported forms : (1 form) +// +// * KTESTW k, k [AVX512DQ] +// +func (self *Program) KTESTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("KTESTW", 2, Operands { v0, v1 }) + // KTESTW k, k + if isK(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x99) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KTESTW") + } + return p +} + +// KUNPCKBW performs "Unpack and Interleave 8-bit Masks". +// +// Mnemonic : KUNPCKBW +// Supported forms : (1 form) +// +// * KUNPCKBW k, k, k [AVX512F] +// +func (self *Program) KUNPCKBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KUNPCKBW", 3, Operands { v0, v1, v2 }) + // KUNPCKBW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KUNPCKBW") + } + return p +} + +// KUNPCKDQ performs "Unpack and Interleave 32-bit Masks". +// +// Mnemonic : KUNPCKDQ +// Supported forms : (1 form) +// +// * KUNPCKDQ k, k, k [AVX512BW] +// +func (self *Program) KUNPCKDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KUNPCKDQ", 3, Operands { v0, v1, v2 }) + // KUNPCKDQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KUNPCKDQ") + } + return p +} + +// KUNPCKWD performs "Unpack and Interleave 16-bit Masks". +// +// Mnemonic : KUNPCKWD +// Supported forms : (1 form) +// +// * KUNPCKWD k, k, k [AVX512BW] +// +func (self *Program) KUNPCKWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KUNPCKWD", 3, Operands { v0, v1, v2 }) + // KUNPCKWD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KUNPCKWD") + } + return p +} + +// KXNORB performs "Bitwise Logical XNOR 8-bit Masks". +// +// Mnemonic : KXNORB +// Supported forms : (1 form) +// +// * KXNORB k, k, k [AVX512DQ] +// +func (self *Program) KXNORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXNORB", 3, Operands { v0, v1, v2 }) + // KXNORB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXNORB") + } + return p +} + +// KXNORD performs "Bitwise Logical XNOR 32-bit Masks". +// +// Mnemonic : KXNORD +// Supported forms : (1 form) +// +// * KXNORD k, k, k [AVX512BW] +// +func (self *Program) KXNORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXNORD", 3, Operands { v0, v1, v2 }) + // KXNORD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXNORD") + } + return p +} + +// KXNORQ performs "Bitwise Logical XNOR 64-bit Masks". +// +// Mnemonic : KXNORQ +// Supported forms : (1 form) +// +// * KXNORQ k, k, k [AVX512BW] +// +func (self *Program) KXNORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXNORQ", 3, Operands { v0, v1, v2 }) + // KXNORQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXNORQ") + } + return p +} + +// KXNORW performs "Bitwise Logical XNOR 16-bit Masks". +// +// Mnemonic : KXNORW +// Supported forms : (1 form) +// +// * KXNORW k, k, k [AVX512F] +// +func (self *Program) KXNORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXNORW", 3, Operands { v0, v1, v2 }) + // KXNORW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXNORW") + } + return p +} + +// KXORB performs "Bitwise Logical XOR 8-bit Masks". +// +// Mnemonic : KXORB +// Supported forms : (1 form) +// +// * KXORB k, k, k [AVX512DQ] +// +func (self *Program) KXORB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXORB", 3, Operands { v0, v1, v2 }) + // KXORB k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, nil, hlcode(v[1])) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXORB") + } + return p +} + +// KXORD performs "Bitwise Logical XOR 32-bit Masks". +// +// Mnemonic : KXORD +// Supported forms : (1 form) +// +// * KXORD k, k, k [AVX512BW] +// +func (self *Program) KXORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXORD", 3, Operands { v0, v1, v2 }) + // KXORD k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXORD") + } + return p +} + +// KXORQ performs "Bitwise Logical XOR 64-bit Masks". +// +// Mnemonic : KXORQ +// Supported forms : (1 form) +// +// * KXORQ k, k, k [AVX512BW] +// +func (self *Program) KXORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXORQ", 3, Operands { v0, v1, v2 }) + // KXORQ k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1) + m.emit(0xfc ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXORQ") + } + return p +} + +// KXORW performs "Bitwise Logical XOR 16-bit Masks". +// +// Mnemonic : KXORW +// Supported forms : (1 form) +// +// * KXORW k, k, k [AVX512F] +// +func (self *Program) KXORW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("KXORW", 3, Operands { v0, v1, v2 }) + // KXORW k, k, k + if isK(v0) && isK(v1) && isK(v2) { + self.require(ISA_AVX512F) + p.domain = DomainMask + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, hlcode(v[1])) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for KXORW") + } + return p +} + +// LDDQU performs "Load Unaligned Integer 128 Bits". +// +// Mnemonic : LDDQU +// Supported forms : (1 form) +// +// * LDDQU m128, xmm [SSE3] +// +func (self *Program) LDDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LDDQU", 2, Operands { v0, v1 }) + // LDDQU m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LDDQU") + } + return p +} + +// LDMXCSR performs "Load MXCSR Register". +// +// Mnemonic : LDMXCSR +// Supported forms : (1 form) +// +// * LDMXCSR m32 [SSE] +// +func (self *Program) LDMXCSR(v0 interface{}) *Instruction { + p := self.alloc("LDMXCSR", 1, Operands { v0 }) + // LDMXCSR m32 + if isM32(v0) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LDMXCSR") + } + return p +} + +// LEAL performs "Load Effective Address". +// +// Mnemonic : LEA +// Supported forms : (1 form) +// +// * LEAL m, r32 +// +func (self *Program) LEAL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LEAL", 2, Operands { v0, v1 }) + // LEAL m, r32 + if isM(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x8d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LEAL") + } + return p +} + +// LEAQ performs "Load Effective Address". +// +// Mnemonic : LEA +// Supported forms : (1 form) +// +// * LEAQ m, r64 +// +func (self *Program) LEAQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LEAQ", 2, Operands { v0, v1 }) + // LEAQ m, r64 + if isM(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x8d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LEAQ") + } + return p +} + +// LEAW performs "Load Effective Address". +// +// Mnemonic : LEA +// Supported forms : (1 form) +// +// * LEAW m, r16 +// +func (self *Program) LEAW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LEAW", 2, Operands { v0, v1 }) + // LEAW m, r16 + if isM(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x8d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LEAW") + } + return p +} + +// LFENCE performs "Load Fence". +// +// Mnemonic : LFENCE +// Supported forms : (1 form) +// +// * LFENCE [SSE2] +// +func (self *Program) LFENCE() *Instruction { + p := self.alloc("LFENCE", 0, Operands { }) + // LFENCE + self.require(ISA_SSE2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0xae) + m.emit(0xe8) + }) + return p +} + +// LZCNTL performs "Count the Number of Leading Zero Bits". +// +// Mnemonic : LZCNT +// Supported forms : (2 forms) +// +// * LZCNTL r32, r32 [LZCNT] +// * LZCNTL m32, r32 [LZCNT] +// +func (self *Program) LZCNTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LZCNTL", 2, Operands { v0, v1 }) + // LZCNTL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // LZCNTL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LZCNTL") + } + return p +} + +// LZCNTQ performs "Count the Number of Leading Zero Bits". +// +// Mnemonic : LZCNT +// Supported forms : (2 forms) +// +// * LZCNTQ r64, r64 [LZCNT] +// * LZCNTQ m64, r64 [LZCNT] +// +func (self *Program) LZCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LZCNTQ", 2, Operands { v0, v1 }) + // LZCNTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // LZCNTQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LZCNTQ") + } + return p +} + +// LZCNTW performs "Count the Number of Leading Zero Bits". +// +// Mnemonic : LZCNT +// Supported forms : (2 forms) +// +// * LZCNTW r16, r16 [LZCNT] +// * LZCNTW m16, r16 [LZCNT] +// +func (self *Program) LZCNTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("LZCNTW", 2, Operands { v0, v1 }) + // LZCNTW r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // LZCNTW m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_LZCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for LZCNTW") + } + return p +} + +// MASKMOVDQU performs "Store Selected Bytes of Double Quadword". +// +// Mnemonic : MASKMOVDQU +// Supported forms : (1 form) +// +// * MASKMOVDQU xmm, xmm [SSE2] +// +func (self *Program) MASKMOVDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MASKMOVDQU", 2, Operands { v0, v1 }) + // MASKMOVDQU xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MASKMOVDQU") + } + return p +} + +// MASKMOVQ performs "Store Selected Bytes of Quadword". +// +// Mnemonic : MASKMOVQ +// Supported forms : (1 form) +// +// * MASKMOVQ mm, mm [MMX+] +// +func (self *Program) MASKMOVQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MASKMOVQ", 2, Operands { v0, v1 }) + // MASKMOVQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MASKMOVQ") + } + return p +} + +// MAXPD performs "Return Maximum Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MAXPD +// Supported forms : (2 forms) +// +// * MAXPD xmm, xmm [SSE2] +// * MAXPD m128, xmm [SSE2] +// +func (self *Program) MAXPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MAXPD", 2, Operands { v0, v1 }) + // MAXPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MAXPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MAXPD") + } + return p +} + +// MAXPS performs "Return Maximum Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MAXPS +// Supported forms : (2 forms) +// +// * MAXPS xmm, xmm [SSE] +// * MAXPS m128, xmm [SSE] +// +func (self *Program) MAXPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MAXPS", 2, Operands { v0, v1 }) + // MAXPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MAXPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MAXPS") + } + return p +} + +// MAXSD performs "Return Maximum Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : MAXSD +// Supported forms : (2 forms) +// +// * MAXSD xmm, xmm [SSE2] +// * MAXSD m64, xmm [SSE2] +// +func (self *Program) MAXSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MAXSD", 2, Operands { v0, v1 }) + // MAXSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MAXSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MAXSD") + } + return p +} + +// MAXSS performs "Return Maximum Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : MAXSS +// Supported forms : (2 forms) +// +// * MAXSS xmm, xmm [SSE] +// * MAXSS m32, xmm [SSE] +// +func (self *Program) MAXSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MAXSS", 2, Operands { v0, v1 }) + // MAXSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MAXSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MAXSS") + } + return p +} + +// MFENCE performs "Memory Fence". +// +// Mnemonic : MFENCE +// Supported forms : (1 form) +// +// * MFENCE [SSE2] +// +func (self *Program) MFENCE() *Instruction { + p := self.alloc("MFENCE", 0, Operands { }) + // MFENCE + self.require(ISA_SSE2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0xae) + m.emit(0xf0) + }) + return p +} + +// MINPD performs "Return Minimum Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MINPD +// Supported forms : (2 forms) +// +// * MINPD xmm, xmm [SSE2] +// * MINPD m128, xmm [SSE2] +// +func (self *Program) MINPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MINPD", 2, Operands { v0, v1 }) + // MINPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MINPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MINPD") + } + return p +} + +// MINPS performs "Return Minimum Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MINPS +// Supported forms : (2 forms) +// +// * MINPS xmm, xmm [SSE] +// * MINPS m128, xmm [SSE] +// +func (self *Program) MINPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MINPS", 2, Operands { v0, v1 }) + // MINPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MINPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MINPS") + } + return p +} + +// MINSD performs "Return Minimum Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : MINSD +// Supported forms : (2 forms) +// +// * MINSD xmm, xmm [SSE2] +// * MINSD m64, xmm [SSE2] +// +func (self *Program) MINSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MINSD", 2, Operands { v0, v1 }) + // MINSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MINSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MINSD") + } + return p +} + +// MINSS performs "Return Minimum Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : MINSS +// Supported forms : (2 forms) +// +// * MINSS xmm, xmm [SSE] +// * MINSS m32, xmm [SSE] +// +func (self *Program) MINSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MINSS", 2, Operands { v0, v1 }) + // MINSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MINSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MINSS") + } + return p +} + +// MONITOR performs "Monitor a Linear Address Range". +// +// Mnemonic : MONITOR +// Supported forms : (1 form) +// +// * MONITOR [MONITOR] +// +func (self *Program) MONITOR() *Instruction { + p := self.alloc("MONITOR", 0, Operands { }) + // MONITOR + self.require(ISA_MONITOR) + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xc8) + }) + return p +} + +// MONITORX performs "Monitor a Linear Address Range with Timeout". +// +// Mnemonic : MONITORX +// Supported forms : (1 form) +// +// * MONITORX [MONITORX] +// +func (self *Program) MONITORX() *Instruction { + p := self.alloc("MONITORX", 0, Operands { }) + // MONITORX + self.require(ISA_MONITORX) + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xfa) + }) + return p +} + +// MOVAPD performs "Move Aligned Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MOVAPD +// Supported forms : (3 forms) +// +// * MOVAPD xmm, xmm [SSE2] +// * MOVAPD m128, xmm [SSE2] +// * MOVAPD xmm, m128 [SSE2] +// +func (self *Program) MOVAPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVAPD", 2, Operands { v0, v1 }) + // MOVAPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVAPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVAPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVAPD") + } + return p +} + +// MOVAPS performs "Move Aligned Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MOVAPS +// Supported forms : (3 forms) +// +// * MOVAPS xmm, xmm [SSE] +// * MOVAPS m128, xmm [SSE] +// * MOVAPS xmm, m128 [SSE] +// +func (self *Program) MOVAPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVAPS", 2, Operands { v0, v1 }) + // MOVAPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVAPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVAPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVAPS") + } + return p +} + +// MOVB performs "Move". +// +// Mnemonic : MOV +// Supported forms : (5 forms) +// +// * MOVB imm8, r8 +// * MOVB r8, r8 +// * MOVB m8, r8 +// * MOVB imm8, m8 +// * MOVB r8, m8 +// +func (self *Program) MOVB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVB", 2, Operands { v0, v1 }) + // MOVB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xb0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // MOVB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x88) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x8a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc6) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // MOVB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x88) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVB") + } + return p +} + +// MOVBEL performs "Move Data After Swapping Bytes". +// +// Mnemonic : MOVBE +// Supported forms : (2 forms) +// +// * MOVBEL m32, r32 [MOVBE] +// * MOVBEL r32, m32 [MOVBE] +// +func (self *Program) MOVBEL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVBEL", 2, Operands { v0, v1 }) + // MOVBEL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVBEL r32, m32 + if isReg32(v0) && isM32(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVBEL") + } + return p +} + +// MOVBEQ performs "Move Data After Swapping Bytes". +// +// Mnemonic : MOVBE +// Supported forms : (2 forms) +// +// * MOVBEQ m64, r64 [MOVBE] +// * MOVBEQ r64, m64 [MOVBE] +// +func (self *Program) MOVBEQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVBEQ", 2, Operands { v0, v1 }) + // MOVBEQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVBEQ r64, m64 + if isReg64(v0) && isM64(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVBEQ") + } + return p +} + +// MOVBEW performs "Move Data After Swapping Bytes". +// +// Mnemonic : MOVBE +// Supported forms : (2 forms) +// +// * MOVBEW m16, r16 [MOVBE] +// * MOVBEW r16, m16 [MOVBE] +// +func (self *Program) MOVBEW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVBEW", 2, Operands { v0, v1 }) + // MOVBEW m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVBEW r16, m16 + if isReg16(v0) && isM16(v1) { + self.require(ISA_MOVBE) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xf1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVBEW") + } + return p +} + +// MOVD performs "Move Doubleword". +// +// Mnemonic : MOVD +// Supported forms : (8 forms) +// +// * MOVD mm, r32 [MMX] +// * MOVD r32, mm [MMX] +// * MOVD m32, mm [MMX] +// * MOVD mm, m32 [MMX] +// * MOVD xmm, r32 [SSE2] +// * MOVD r32, xmm [SSE2] +// * MOVD m32, xmm [SSE2] +// * MOVD xmm, m32 [SSE2] +// +func (self *Program) MOVD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVD", 2, Operands { v0, v1 }) + // MOVD mm, r32 + if isMM(v0) && isReg32(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVD r32, mm + if isReg32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVD m32, mm + if isM32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVD mm, m32 + if isMM(v0) && isM32(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // MOVD xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVD r32, xmm + if isReg32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVD xmm, m32 + if isXMM(v0) && isM32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVD") + } + return p +} + +// MOVDDUP performs "Move One Double-FP and Duplicate". +// +// Mnemonic : MOVDDUP +// Supported forms : (2 forms) +// +// * MOVDDUP xmm, xmm [SSE3] +// * MOVDDUP m64, xmm [SSE3] +// +func (self *Program) MOVDDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVDDUP", 2, Operands { v0, v1 }) + // MOVDDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVDDUP m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVDDUP") + } + return p +} + +// MOVDQ2Q performs "Move Quadword from XMM to MMX Technology Register". +// +// Mnemonic : MOVDQ2Q +// Supported forms : (1 form) +// +// * MOVDQ2Q xmm, mm [SSE2] +// +func (self *Program) MOVDQ2Q(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVDQ2Q", 2, Operands { v0, v1 }) + // MOVDQ2Q xmm, mm + if isXMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVDQ2Q") + } + return p +} + +// MOVDQA performs "Move Aligned Double Quadword". +// +// Mnemonic : MOVDQA +// Supported forms : (3 forms) +// +// * MOVDQA xmm, xmm [SSE2] +// * MOVDQA m128, xmm [SSE2] +// * MOVDQA xmm, m128 [SSE2] +// +func (self *Program) MOVDQA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVDQA", 2, Operands { v0, v1 }) + // MOVDQA xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVDQA m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVDQA xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVDQA") + } + return p +} + +// MOVDQU performs "Move Unaligned Double Quadword". +// +// Mnemonic : MOVDQU +// Supported forms : (3 forms) +// +// * MOVDQU xmm, xmm [SSE2] +// * MOVDQU m128, xmm [SSE2] +// * MOVDQU xmm, m128 [SSE2] +// +func (self *Program) MOVDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVDQU", 2, Operands { v0, v1 }) + // MOVDQU xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVDQU m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVDQU xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVDQU") + } + return p +} + +// MOVHLPS performs "Move Packed Single-Precision Floating-Point Values High to Low". +// +// Mnemonic : MOVHLPS +// Supported forms : (1 form) +// +// * MOVHLPS xmm, xmm [SSE] +// +func (self *Program) MOVHLPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVHLPS", 2, Operands { v0, v1 }) + // MOVHLPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVHLPS") + } + return p +} + +// MOVHPD performs "Move High Packed Double-Precision Floating-Point Value". +// +// Mnemonic : MOVHPD +// Supported forms : (2 forms) +// +// * MOVHPD m64, xmm [SSE2] +// * MOVHPD xmm, m64 [SSE2] +// +func (self *Program) MOVHPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVHPD", 2, Operands { v0, v1 }) + // MOVHPD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVHPD xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVHPD") + } + return p +} + +// MOVHPS performs "Move High Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MOVHPS +// Supported forms : (2 forms) +// +// * MOVHPS m64, xmm [SSE] +// * MOVHPS xmm, m64 [SSE] +// +func (self *Program) MOVHPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVHPS", 2, Operands { v0, v1 }) + // MOVHPS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVHPS xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVHPS") + } + return p +} + +// MOVL performs "Move". +// +// Mnemonic : MOV +// Supported forms : (5 forms) +// +// * MOVL imm32, r32 +// * MOVL r32, r32 +// * MOVL m32, r32 +// * MOVL imm32, m32 +// * MOVL r32, m32 +// +func (self *Program) MOVL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVL", 2, Operands { v0, v1 }) + // MOVL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc7) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xb8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // MOVL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x89) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x8b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc7) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // MOVL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x89) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVL") + } + return p +} + +// MOVLHPS performs "Move Packed Single-Precision Floating-Point Values Low to High". +// +// Mnemonic : MOVLHPS +// Supported forms : (1 form) +// +// * MOVLHPS xmm, xmm [SSE] +// +func (self *Program) MOVLHPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVLHPS", 2, Operands { v0, v1 }) + // MOVLHPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVLHPS") + } + return p +} + +// MOVLPD performs "Move Low Packed Double-Precision Floating-Point Value". +// +// Mnemonic : MOVLPD +// Supported forms : (2 forms) +// +// * MOVLPD m64, xmm [SSE2] +// * MOVLPD xmm, m64 [SSE2] +// +func (self *Program) MOVLPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVLPD", 2, Operands { v0, v1 }) + // MOVLPD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVLPD xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVLPD") + } + return p +} + +// MOVLPS performs "Move Low Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MOVLPS +// Supported forms : (2 forms) +// +// * MOVLPS m64, xmm [SSE] +// * MOVLPS xmm, m64 [SSE] +// +func (self *Program) MOVLPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVLPS", 2, Operands { v0, v1 }) + // MOVLPS m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVLPS xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVLPS") + } + return p +} + +// MOVMSKPD performs "Extract Packed Double-Precision Floating-Point Sign Mask". +// +// Mnemonic : MOVMSKPD +// Supported forms : (1 form) +// +// * MOVMSKPD xmm, r32 [SSE2] +// +func (self *Program) MOVMSKPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVMSKPD", 2, Operands { v0, v1 }) + // MOVMSKPD xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVMSKPD") + } + return p +} + +// MOVMSKPS performs "Extract Packed Single-Precision Floating-Point Sign Mask". +// +// Mnemonic : MOVMSKPS +// Supported forms : (1 form) +// +// * MOVMSKPS xmm, r32 [SSE] +// +func (self *Program) MOVMSKPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVMSKPS", 2, Operands { v0, v1 }) + // MOVMSKPS xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVMSKPS") + } + return p +} + +// MOVNTDQ performs "Store Double Quadword Using Non-Temporal Hint". +// +// Mnemonic : MOVNTDQ +// Supported forms : (1 form) +// +// * MOVNTDQ xmm, m128 [SSE2] +// +func (self *Program) MOVNTDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTDQ", 2, Operands { v0, v1 }) + // MOVNTDQ xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTDQ") + } + return p +} + +// MOVNTDQA performs "Load Double Quadword Non-Temporal Aligned Hint". +// +// Mnemonic : MOVNTDQA +// Supported forms : (1 form) +// +// * MOVNTDQA m128, xmm [SSE4.1] +// +func (self *Program) MOVNTDQA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTDQA", 2, Operands { v0, v1 }) + // MOVNTDQA m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTDQA") + } + return p +} + +// MOVNTIL performs "Store Doubleword Using Non-Temporal Hint". +// +// Mnemonic : MOVNTI +// Supported forms : (1 form) +// +// * MOVNTIL r32, m32 [SSE2] +// +func (self *Program) MOVNTIL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTIL", 2, Operands { v0, v1 }) + // MOVNTIL r32, m32 + if isReg32(v0) && isM32(v1) { + self.require(ISA_SSE2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTIL") + } + return p +} + +// MOVNTIQ performs "Store Doubleword Using Non-Temporal Hint". +// +// Mnemonic : MOVNTI +// Supported forms : (1 form) +// +// * MOVNTIQ r64, m64 [SSE2] +// +func (self *Program) MOVNTIQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTIQ", 2, Operands { v0, v1 }) + // MOVNTIQ r64, m64 + if isReg64(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xc3) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTIQ") + } + return p +} + +// MOVNTPD performs "Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : MOVNTPD +// Supported forms : (1 form) +// +// * MOVNTPD xmm, m128 [SSE2] +// +func (self *Program) MOVNTPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTPD", 2, Operands { v0, v1 }) + // MOVNTPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTPD") + } + return p +} + +// MOVNTPS performs "Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : MOVNTPS +// Supported forms : (1 form) +// +// * MOVNTPS xmm, m128 [SSE] +// +func (self *Program) MOVNTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTPS", 2, Operands { v0, v1 }) + // MOVNTPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTPS") + } + return p +} + +// MOVNTQ performs "Store of Quadword Using Non-Temporal Hint". +// +// Mnemonic : MOVNTQ +// Supported forms : (1 form) +// +// * MOVNTQ mm, m64 [MMX+] +// +func (self *Program) MOVNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTQ", 2, Operands { v0, v1 }) + // MOVNTQ mm, m64 + if isMM(v0) && isM64(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTQ") + } + return p +} + +// MOVNTSD performs "Store Scalar Double-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : MOVNTSD +// Supported forms : (1 form) +// +// * MOVNTSD xmm, m64 [SSE4A] +// +func (self *Program) MOVNTSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTSD", 2, Operands { v0, v1 }) + // MOVNTSD xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTSD") + } + return p +} + +// MOVNTSS performs "Store Scalar Single-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : MOVNTSS +// Supported forms : (1 form) +// +// * MOVNTSS xmm, m32 [SSE4A] +// +func (self *Program) MOVNTSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVNTSS", 2, Operands { v0, v1 }) + // MOVNTSS xmm, m32 + if isXMM(v0) && isM32(v1) { + self.require(ISA_SSE4A) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVNTSS") + } + return p +} + +// MOVQ performs "Move". +// +// Mnemonic : MOV +// Supported forms : (16 forms) +// +// * MOVQ imm32, r64 +// * MOVQ imm64, r64 +// * MOVQ r64, r64 +// * MOVQ m64, r64 +// * MOVQ imm32, m64 +// * MOVQ r64, m64 +// * MOVQ mm, r64 [MMX] +// * MOVQ r64, mm [MMX] +// * MOVQ mm, mm [MMX] +// * MOVQ m64, mm [MMX] +// * MOVQ mm, m64 [MMX] +// * MOVQ xmm, r64 [SSE2] +// * MOVQ r64, xmm [SSE2] +// * MOVQ xmm, xmm [SSE2] +// * MOVQ m64, xmm [SSE2] +// * MOVQ xmm, m64 [SSE2] +// +func (self *Program) MOVQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVQ", 2, Operands { v0, v1 }) + // MOVQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc7) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // MOVQ imm64, r64 + if isImm64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xb8 | lcode(v[1])) + m.imm8(toImmAny(v[0])) + }) + } + // MOVQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x89) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x8b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc7) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // MOVQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x89) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // MOVQ mm, r64 + if isMM(v0) && isReg64(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVQ r64, mm + if isReg64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVQ mm, m64 + if isMM(v0) && isM64(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // MOVQ xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVQ r64, xmm + if isReg64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVQ xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xd6) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVQ") + } + return p +} + +// MOVQ2DQ performs "Move Quadword from MMX Technology to XMM Register". +// +// Mnemonic : MOVQ2DQ +// Supported forms : (1 form) +// +// * MOVQ2DQ mm, xmm [SSE2] +// +func (self *Program) MOVQ2DQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVQ2DQ", 2, Operands { v0, v1 }) + // MOVQ2DQ mm, xmm + if isMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MOVQ2DQ") + } + return p +} + +// MOVSBL performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSBL r8, r32 +// * MOVSBL m8, r32 +// +func (self *Program) MOVSBL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSBL", 2, Operands { v0, v1 }) + // MOVSBL r8, r32 + if isReg8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSBL m8, r32 + if isM8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSBL") + } + return p +} + +// MOVSBQ performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSBQ r8, r64 +// * MOVSBQ m8, r64 +// +func (self *Program) MOVSBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSBQ", 2, Operands { v0, v1 }) + // MOVSBQ r8, r64 + if isReg8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSBQ m8, r64 + if isM8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSBQ") + } + return p +} + +// MOVSBW performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSBW r8, r16 +// * MOVSBW m8, r16 +// +func (self *Program) MOVSBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSBW", 2, Operands { v0, v1 }) + // MOVSBW r8, r16 + if isReg8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSBW m8, r16 + if isM8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSBW") + } + return p +} + +// MOVSD performs "Move Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : MOVSD +// Supported forms : (3 forms) +// +// * MOVSD xmm, xmm [SSE2] +// * MOVSD m64, xmm [SSE2] +// * MOVSD xmm, m64 [SSE2] +// +func (self *Program) MOVSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSD", 2, Operands { v0, v1 }) + // MOVSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVSD xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSD") + } + return p +} + +// MOVSHDUP performs "Move Packed Single-FP High and Duplicate". +// +// Mnemonic : MOVSHDUP +// Supported forms : (2 forms) +// +// * MOVSHDUP xmm, xmm [SSE3] +// * MOVSHDUP m128, xmm [SSE3] +// +func (self *Program) MOVSHDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSHDUP", 2, Operands { v0, v1 }) + // MOVSHDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSHDUP m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSHDUP") + } + return p +} + +// MOVSLDUP performs "Move Packed Single-FP Low and Duplicate". +// +// Mnemonic : MOVSLDUP +// Supported forms : (2 forms) +// +// * MOVSLDUP xmm, xmm [SSE3] +// * MOVSLDUP m128, xmm [SSE3] +// +func (self *Program) MOVSLDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSLDUP", 2, Operands { v0, v1 }) + // MOVSLDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSLDUP m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSLDUP") + } + return p +} + +// MOVSLQ performs "Move Doubleword to Quadword with Sign-Extension". +// +// Mnemonic : MOVSXD +// Supported forms : (2 forms) +// +// * MOVSLQ r32, r64 +// * MOVSLQ m32, r64 +// +func (self *Program) MOVSLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSLQ", 2, Operands { v0, v1 }) + // MOVSLQ r32, r64 + if isReg32(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x63) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSLQ m32, r64 + if isM32(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x63) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSLQ") + } + return p +} + +// MOVSS performs "Move Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : MOVSS +// Supported forms : (3 forms) +// +// * MOVSS xmm, xmm [SSE] +// * MOVSS m32, xmm [SSE] +// * MOVSS xmm, m32 [SSE] +// +func (self *Program) MOVSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSS", 2, Operands { v0, v1 }) + // MOVSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVSS xmm, m32 + if isXMM(v0) && isM32(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSS") + } + return p +} + +// MOVSWL performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSWL r16, r32 +// * MOVSWL m16, r32 +// +func (self *Program) MOVSWL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSWL", 2, Operands { v0, v1 }) + // MOVSWL r16, r32 + if isReg16(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSWL m16, r32 + if isM16(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSWL") + } + return p +} + +// MOVSWQ performs "Move with Sign-Extension". +// +// Mnemonic : MOVSX +// Supported forms : (2 forms) +// +// * MOVSWQ r16, r64 +// * MOVSWQ m16, r64 +// +func (self *Program) MOVSWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVSWQ", 2, Operands { v0, v1 }) + // MOVSWQ r16, r64 + if isReg16(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVSWQ m16, r64 + if isM16(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVSWQ") + } + return p +} + +// MOVUPD performs "Move Unaligned Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MOVUPD +// Supported forms : (3 forms) +// +// * MOVUPD xmm, xmm [SSE2] +// * MOVUPD m128, xmm [SSE2] +// * MOVUPD xmm, m128 [SSE2] +// +func (self *Program) MOVUPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVUPD", 2, Operands { v0, v1 }) + // MOVUPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVUPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVUPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVUPD") + } + return p +} + +// MOVUPS performs "Move Unaligned Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MOVUPS +// Supported forms : (3 forms) +// +// * MOVUPS xmm, xmm [SSE] +// * MOVUPS m128, xmm [SSE] +// * MOVUPS xmm, m128 [SSE] +// +func (self *Program) MOVUPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVUPS", 2, Operands { v0, v1 }) + // MOVUPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // MOVUPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVUPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVUPS") + } + return p +} + +// MOVW performs "Move". +// +// Mnemonic : MOV +// Supported forms : (5 forms) +// +// * MOVW imm16, r16 +// * MOVW r16, r16 +// * MOVW m16, r16 +// * MOVW imm16, m16 +// * MOVW r16, m16 +// +func (self *Program) MOVW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVW", 2, Operands { v0, v1 }) + // MOVW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc7) + m.emit(0xc0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xb8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // MOVW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x89) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x8b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // MOVW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc7) + m.mrsd(0, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // MOVW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x89) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVW") + } + return p +} + +// MOVZBL performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZBL r8, r32 +// * MOVZBL m8, r32 +// +func (self *Program) MOVZBL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZBL", 2, Operands { v0, v1 }) + // MOVZBL r8, r32 + if isReg8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZBL m8, r32 + if isM8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZBL") + } + return p +} + +// MOVZBQ performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZBQ r8, r64 +// * MOVZBQ m8, r64 +// +func (self *Program) MOVZBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZBQ", 2, Operands { v0, v1 }) + // MOVZBQ r8, r64 + if isReg8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZBQ m8, r64 + if isM8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xb6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZBQ") + } + return p +} + +// MOVZBW performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZBW r8, r16 +// * MOVZBW m8, r16 +// +func (self *Program) MOVZBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZBW", 2, Operands { v0, v1 }) + // MOVZBW r8, r16 + if isReg8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZBW m8, r16 + if isM8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZBW") + } + return p +} + +// MOVZWL performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZWL r16, r32 +// * MOVZWL m16, r32 +// +func (self *Program) MOVZWL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZWL", 2, Operands { v0, v1 }) + // MOVZWL r16, r32 + if isReg16(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZWL m16, r32 + if isM16(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb7) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZWL") + } + return p +} + +// MOVZWQ performs "Move with Zero-Extend". +// +// Mnemonic : MOVZX +// Supported forms : (2 forms) +// +// * MOVZWQ r16, r64 +// * MOVZWQ m16, r64 +// +func (self *Program) MOVZWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MOVZWQ", 2, Operands { v0, v1 }) + // MOVZWQ r16, r64 + if isReg16(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MOVZWQ m16, r64 + if isM16(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xb7) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MOVZWQ") + } + return p +} + +// MPSADBW performs "Compute Multiple Packed Sums of Absolute Difference". +// +// Mnemonic : MPSADBW +// Supported forms : (2 forms) +// +// * MPSADBW imm8, xmm, xmm [SSE4.1] +// * MPSADBW imm8, m128, xmm [SSE4.1] +// +func (self *Program) MPSADBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("MPSADBW", 3, Operands { v0, v1, v2 }) + // MPSADBW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // MPSADBW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x42) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for MPSADBW") + } + return p +} + +// MULB performs "Unsigned Multiply". +// +// Mnemonic : MUL +// Supported forms : (2 forms) +// +// * MULB r8 +// * MULB m8 +// +func (self *Program) MULB(v0 interface{}) *Instruction { + p := self.alloc("MULB", 1, Operands { v0 }) + // MULB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xe0 | lcode(v[0])) + }) + } + // MULB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULB") + } + return p +} + +// MULL performs "Unsigned Multiply". +// +// Mnemonic : MUL +// Supported forms : (2 forms) +// +// * MULL r32 +// * MULL m32 +// +func (self *Program) MULL(v0 interface{}) *Instruction { + p := self.alloc("MULL", 1, Operands { v0 }) + // MULL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xe0 | lcode(v[0])) + }) + } + // MULL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULL") + } + return p +} + +// MULPD performs "Multiply Packed Double-Precision Floating-Point Values". +// +// Mnemonic : MULPD +// Supported forms : (2 forms) +// +// * MULPD xmm, xmm [SSE2] +// * MULPD m128, xmm [SSE2] +// +func (self *Program) MULPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MULPD", 2, Operands { v0, v1 }) + // MULPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MULPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULPD") + } + return p +} + +// MULPS performs "Multiply Packed Single-Precision Floating-Point Values". +// +// Mnemonic : MULPS +// Supported forms : (2 forms) +// +// * MULPS xmm, xmm [SSE] +// * MULPS m128, xmm [SSE] +// +func (self *Program) MULPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MULPS", 2, Operands { v0, v1 }) + // MULPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MULPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULPS") + } + return p +} + +// MULQ performs "Unsigned Multiply". +// +// Mnemonic : MUL +// Supported forms : (2 forms) +// +// * MULQ r64 +// * MULQ m64 +// +func (self *Program) MULQ(v0 interface{}) *Instruction { + p := self.alloc("MULQ", 1, Operands { v0 }) + // MULQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xe0 | lcode(v[0])) + }) + } + // MULQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULQ") + } + return p +} + +// MULSD performs "Multiply Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : MULSD +// Supported forms : (2 forms) +// +// * MULSD xmm, xmm [SSE2] +// * MULSD m64, xmm [SSE2] +// +func (self *Program) MULSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MULSD", 2, Operands { v0, v1 }) + // MULSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MULSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULSD") + } + return p +} + +// MULSS performs "Multiply Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : MULSS +// Supported forms : (2 forms) +// +// * MULSS xmm, xmm [SSE] +// * MULSS m32, xmm [SSE] +// +func (self *Program) MULSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("MULSS", 2, Operands { v0, v1 }) + // MULSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // MULSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULSS") + } + return p +} + +// MULW performs "Unsigned Multiply". +// +// Mnemonic : MUL +// Supported forms : (2 forms) +// +// * MULW r16 +// * MULW m16 +// +func (self *Program) MULW(v0 interface{}) *Instruction { + p := self.alloc("MULW", 1, Operands { v0 }) + // MULW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xe0 | lcode(v[0])) + }) + } + // MULW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULW") + } + return p +} + +// MULXL performs "Unsigned Multiply Without Affecting Flags". +// +// Mnemonic : MULX +// Supported forms : (2 forms) +// +// * MULXL r32, r32, r32 [BMI2] +// * MULXL m32, r32, r32 [BMI2] +// +func (self *Program) MULXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("MULXL", 3, Operands { v0, v1, v2 }) + // MULXL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7b ^ (hlcode(v[1]) << 3)) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // MULXL m32, r32, r32 + if isM32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULXL") + } + return p +} + +// MULXQ performs "Unsigned Multiply Without Affecting Flags". +// +// Mnemonic : MULX +// Supported forms : (2 forms) +// +// * MULXQ r64, r64, r64 [BMI2] +// * MULXQ m64, r64, r64 [BMI2] +// +func (self *Program) MULXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("MULXQ", 3, Operands { v0, v1, v2 }) + // MULXQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb ^ (hlcode(v[1]) << 3)) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // MULXQ m64, r64, r64 + if isM64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for MULXQ") + } + return p +} + +// MWAIT performs "Monitor Wait". +// +// Mnemonic : MWAIT +// Supported forms : (1 form) +// +// * MWAIT [MONITOR] +// +func (self *Program) MWAIT() *Instruction { + p := self.alloc("MWAIT", 0, Operands { }) + // MWAIT + self.require(ISA_MONITOR) + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xc9) + }) + return p +} + +// MWAITX performs "Monitor Wait with Timeout". +// +// Mnemonic : MWAITX +// Supported forms : (1 form) +// +// * MWAITX [MONITORX] +// +func (self *Program) MWAITX() *Instruction { + p := self.alloc("MWAITX", 0, Operands { }) + // MWAITX + self.require(ISA_MONITORX) + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xfb) + }) + return p +} + +// NEGB performs "Two's Complement Negation". +// +// Mnemonic : NEG +// Supported forms : (2 forms) +// +// * NEGB r8 +// * NEGB m8 +// +func (self *Program) NEGB(v0 interface{}) *Instruction { + p := self.alloc("NEGB", 1, Operands { v0 }) + // NEGB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xd8 | lcode(v[0])) + }) + } + // NEGB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NEGB") + } + return p +} + +// NEGL performs "Two's Complement Negation". +// +// Mnemonic : NEG +// Supported forms : (2 forms) +// +// * NEGL r32 +// * NEGL m32 +// +func (self *Program) NEGL(v0 interface{}) *Instruction { + p := self.alloc("NEGL", 1, Operands { v0 }) + // NEGL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xd8 | lcode(v[0])) + }) + } + // NEGL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NEGL") + } + return p +} + +// NEGQ performs "Two's Complement Negation". +// +// Mnemonic : NEG +// Supported forms : (2 forms) +// +// * NEGQ r64 +// * NEGQ m64 +// +func (self *Program) NEGQ(v0 interface{}) *Instruction { + p := self.alloc("NEGQ", 1, Operands { v0 }) + // NEGQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xd8 | lcode(v[0])) + }) + } + // NEGQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NEGQ") + } + return p +} + +// NEGW performs "Two's Complement Negation". +// +// Mnemonic : NEG +// Supported forms : (2 forms) +// +// * NEGW r16 +// * NEGW m16 +// +func (self *Program) NEGW(v0 interface{}) *Instruction { + p := self.alloc("NEGW", 1, Operands { v0 }) + // NEGW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xd8 | lcode(v[0])) + }) + } + // NEGW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NEGW") + } + return p +} + +// NOP performs "No Operation". +// +// Mnemonic : NOP +// Supported forms : (1 form) +// +// * NOP +// +func (self *Program) NOP() *Instruction { + p := self.alloc("NOP", 0, Operands { }) + // NOP + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x90) + }) + return p +} + +// NOTB performs "One's Complement Negation". +// +// Mnemonic : NOT +// Supported forms : (2 forms) +// +// * NOTB r8 +// * NOTB m8 +// +func (self *Program) NOTB(v0 interface{}) *Instruction { + p := self.alloc("NOTB", 1, Operands { v0 }) + // NOTB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0xf6) + m.emit(0xd0 | lcode(v[0])) + }) + } + // NOTB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf6) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NOTB") + } + return p +} + +// NOTL performs "One's Complement Negation". +// +// Mnemonic : NOT +// Supported forms : (2 forms) +// +// * NOTL r32 +// * NOTL m32 +// +func (self *Program) NOTL(v0 interface{}) *Instruction { + p := self.alloc("NOTL", 1, Operands { v0 }) + // NOTL r32 + if isReg32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xd0 | lcode(v[0])) + }) + } + // NOTL m32 + if isM32(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NOTL") + } + return p +} + +// NOTQ performs "One's Complement Negation". +// +// Mnemonic : NOT +// Supported forms : (2 forms) +// +// * NOTQ r64 +// * NOTQ m64 +// +func (self *Program) NOTQ(v0 interface{}) *Instruction { + p := self.alloc("NOTQ", 1, Operands { v0 }) + // NOTQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0xf7) + m.emit(0xd0 | lcode(v[0])) + }) + } + // NOTQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[0])) + m.emit(0xf7) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NOTQ") + } + return p +} + +// NOTW performs "One's Complement Negation". +// +// Mnemonic : NOT +// Supported forms : (2 forms) +// +// * NOTW r16 +// * NOTW m16 +// +func (self *Program) NOTW(v0 interface{}) *Instruction { + p := self.alloc("NOTW", 1, Operands { v0 }) + // NOTW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xf7) + m.emit(0xd0 | lcode(v[0])) + }) + } + // NOTW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xf7) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for NOTW") + } + return p +} + +// ORB performs "Logical Inclusive OR". +// +// Mnemonic : OR +// Supported forms : (6 forms) +// +// * ORB imm8, al +// * ORB imm8, r8 +// * ORB r8, r8 +// * ORB m8, r8 +// * ORB imm8, m8 +// * ORB r8, m8 +// +func (self *Program) ORB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORB", 2, Operands { v0, v1 }) + // ORB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0c) + m.imm1(toImmAny(v[0])) + }) + } + // ORB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ORB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x08) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x0a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ORB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ORB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x08) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORB") + } + return p +} + +// ORL performs "Logical Inclusive OR". +// +// Mnemonic : OR +// Supported forms : (8 forms) +// +// * ORL imm32, eax +// * ORL imm8, r32 +// * ORL imm32, r32 +// * ORL r32, r32 +// * ORL m32, r32 +// * ORL imm8, m32 +// * ORL imm32, m32 +// * ORL r32, m32 +// +func (self *Program) ORL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORL", 2, Operands { v0, v1 }) + // ORL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0d) + m.imm4(toImmAny(v[0])) + }) + } + // ORL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ORL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xc8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ORL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x09) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ORL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ORL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(1, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ORL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x09) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORL") + } + return p +} + +// ORPD performs "Bitwise Logical OR of Double-Precision Floating-Point Values". +// +// Mnemonic : ORPD +// Supported forms : (2 forms) +// +// * ORPD xmm, xmm [SSE2] +// * ORPD m128, xmm [SSE2] +// +func (self *Program) ORPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORPD", 2, Operands { v0, v1 }) + // ORPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x56) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x56) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORPD") + } + return p +} + +// ORPS performs "Bitwise Logical OR of Single-Precision Floating-Point Values". +// +// Mnemonic : ORPS +// Supported forms : (2 forms) +// +// * ORPS xmm, xmm [SSE] +// * ORPS m128, xmm [SSE] +// +func (self *Program) ORPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORPS", 2, Operands { v0, v1 }) + // ORPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x56) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x56) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORPS") + } + return p +} + +// ORQ performs "Logical Inclusive OR". +// +// Mnemonic : OR +// Supported forms : (8 forms) +// +// * ORQ imm32, rax +// * ORQ imm8, r64 +// * ORQ imm32, r64 +// * ORQ r64, r64 +// * ORQ m64, r64 +// * ORQ imm8, m64 +// * ORQ imm32, m64 +// * ORQ r64, m64 +// +func (self *Program) ORQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORQ", 2, Operands { v0, v1 }) + // ORQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x0d) + m.imm4(toImmAny(v[0])) + }) + } + // ORQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ORQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xc8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // ORQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x09) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ORQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ORQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(1, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // ORQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORQ") + } + return p +} + +// ORW performs "Logical Inclusive OR". +// +// Mnemonic : OR +// Supported forms : (8 forms) +// +// * ORW imm16, ax +// * ORW imm8, r16 +// * ORW imm16, r16 +// * ORW r16, r16 +// * ORW m16, r16 +// * ORW imm8, m16 +// * ORW imm16, m16 +// * ORW r16, m16 +// +func (self *Program) ORW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ORW", 2, Operands { v0, v1 }) + // ORW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x0d) + m.imm2(toImmAny(v[0])) + }) + } + // ORW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ORW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xc8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // ORW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x09) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // ORW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // ORW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ORW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(1, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // ORW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x09) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ORW") + } + return p +} + +// PABSB performs "Packed Absolute Value of Byte Integers". +// +// Mnemonic : PABSB +// Supported forms : (4 forms) +// +// * PABSB mm, mm [SSSE3] +// * PABSB m64, mm [SSSE3] +// * PABSB xmm, xmm [SSSE3] +// * PABSB m128, xmm [SSSE3] +// +func (self *Program) PABSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PABSB", 2, Operands { v0, v1 }) + // PABSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PABSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PABSB") + } + return p +} + +// PABSD performs "Packed Absolute Value of Doubleword Integers". +// +// Mnemonic : PABSD +// Supported forms : (4 forms) +// +// * PABSD mm, mm [SSSE3] +// * PABSD m64, mm [SSSE3] +// * PABSD xmm, xmm [SSSE3] +// * PABSD m128, xmm [SSSE3] +// +func (self *Program) PABSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PABSD", 2, Operands { v0, v1 }) + // PABSD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PABSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PABSD") + } + return p +} + +// PABSW performs "Packed Absolute Value of Word Integers". +// +// Mnemonic : PABSW +// Supported forms : (4 forms) +// +// * PABSW mm, mm [SSSE3] +// * PABSW m64, mm [SSSE3] +// * PABSW xmm, xmm [SSSE3] +// * PABSW m128, xmm [SSSE3] +// +func (self *Program) PABSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PABSW", 2, Operands { v0, v1 }) + // PABSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PABSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PABSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PABSW") + } + return p +} + +// PACKSSDW performs "Pack Doublewords into Words with Signed Saturation". +// +// Mnemonic : PACKSSDW +// Supported forms : (4 forms) +// +// * PACKSSDW mm, mm [MMX] +// * PACKSSDW m64, mm [MMX] +// * PACKSSDW xmm, xmm [SSE2] +// * PACKSSDW m128, xmm [SSE2] +// +func (self *Program) PACKSSDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PACKSSDW", 2, Operands { v0, v1 }) + // PACKSSDW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKSSDW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PACKSSDW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKSSDW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PACKSSDW") + } + return p +} + +// PACKSSWB performs "Pack Words into Bytes with Signed Saturation". +// +// Mnemonic : PACKSSWB +// Supported forms : (4 forms) +// +// * PACKSSWB mm, mm [MMX] +// * PACKSSWB m64, mm [MMX] +// * PACKSSWB xmm, xmm [SSE2] +// * PACKSSWB m128, xmm [SSE2] +// +func (self *Program) PACKSSWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PACKSSWB", 2, Operands { v0, v1 }) + // PACKSSWB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x63) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKSSWB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x63) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PACKSSWB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x63) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKSSWB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x63) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PACKSSWB") + } + return p +} + +// PACKUSDW performs "Pack Doublewords into Words with Unsigned Saturation". +// +// Mnemonic : PACKUSDW +// Supported forms : (2 forms) +// +// * PACKUSDW xmm, xmm [SSE4.1] +// * PACKUSDW m128, xmm [SSE4.1] +// +func (self *Program) PACKUSDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PACKUSDW", 2, Operands { v0, v1 }) + // PACKUSDW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKUSDW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x2b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PACKUSDW") + } + return p +} + +// PACKUSWB performs "Pack Words into Bytes with Unsigned Saturation". +// +// Mnemonic : PACKUSWB +// Supported forms : (4 forms) +// +// * PACKUSWB mm, mm [MMX] +// * PACKUSWB m64, mm [MMX] +// * PACKUSWB xmm, xmm [SSE2] +// * PACKUSWB m128, xmm [SSE2] +// +func (self *Program) PACKUSWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PACKUSWB", 2, Operands { v0, v1 }) + // PACKUSWB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x67) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKUSWB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x67) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PACKUSWB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x67) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PACKUSWB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x67) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PACKUSWB") + } + return p +} + +// PADDB performs "Add Packed Byte Integers". +// +// Mnemonic : PADDB +// Supported forms : (4 forms) +// +// * PADDB mm, mm [MMX] +// * PADDB m64, mm [MMX] +// * PADDB xmm, xmm [SSE2] +// * PADDB m128, xmm [SSE2] +// +func (self *Program) PADDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDB", 2, Operands { v0, v1 }) + // PADDB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDB") + } + return p +} + +// PADDD performs "Add Packed Doubleword Integers". +// +// Mnemonic : PADDD +// Supported forms : (4 forms) +// +// * PADDD mm, mm [MMX] +// * PADDD m64, mm [MMX] +// * PADDD xmm, xmm [SSE2] +// * PADDD m128, xmm [SSE2] +// +func (self *Program) PADDD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDD", 2, Operands { v0, v1 }) + // PADDD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfe) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDD") + } + return p +} + +// PADDQ performs "Add Packed Quadword Integers". +// +// Mnemonic : PADDQ +// Supported forms : (4 forms) +// +// * PADDQ mm, mm [SSE2] +// * PADDQ m64, mm [SSE2] +// * PADDQ xmm, xmm [SSE2] +// * PADDQ m128, xmm [SSE2] +// +func (self *Program) PADDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDQ", 2, Operands { v0, v1 }) + // PADDQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDQ") + } + return p +} + +// PADDSB performs "Add Packed Signed Byte Integers with Signed Saturation". +// +// Mnemonic : PADDSB +// Supported forms : (4 forms) +// +// * PADDSB mm, mm [MMX] +// * PADDSB m64, mm [MMX] +// * PADDSB xmm, xmm [SSE2] +// * PADDSB m128, xmm [SSE2] +// +func (self *Program) PADDSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDSB", 2, Operands { v0, v1 }) + // PADDSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xec) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xec) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xec) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xec) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDSB") + } + return p +} + +// PADDSW performs "Add Packed Signed Word Integers with Signed Saturation". +// +// Mnemonic : PADDSW +// Supported forms : (4 forms) +// +// * PADDSW mm, mm [MMX] +// * PADDSW m64, mm [MMX] +// * PADDSW xmm, xmm [SSE2] +// * PADDSW m128, xmm [SSE2] +// +func (self *Program) PADDSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDSW", 2, Operands { v0, v1 }) + // PADDSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xed) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xed) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xed) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xed) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDSW") + } + return p +} + +// PADDUSB performs "Add Packed Unsigned Byte Integers with Unsigned Saturation". +// +// Mnemonic : PADDUSB +// Supported forms : (4 forms) +// +// * PADDUSB mm, mm [MMX] +// * PADDUSB m64, mm [MMX] +// * PADDUSB xmm, xmm [SSE2] +// * PADDUSB m128, xmm [SSE2] +// +func (self *Program) PADDUSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDUSB", 2, Operands { v0, v1 }) + // PADDUSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDUSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDUSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDUSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDUSB") + } + return p +} + +// PADDUSW performs "Add Packed Unsigned Word Integers with Unsigned Saturation". +// +// Mnemonic : PADDUSW +// Supported forms : (4 forms) +// +// * PADDUSW mm, mm [MMX] +// * PADDUSW m64, mm [MMX] +// * PADDUSW xmm, xmm [SSE2] +// * PADDUSW m128, xmm [SSE2] +// +func (self *Program) PADDUSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDUSW", 2, Operands { v0, v1 }) + // PADDUSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDUSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDUSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDUSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDUSW") + } + return p +} + +// PADDW performs "Add Packed Word Integers". +// +// Mnemonic : PADDW +// Supported forms : (4 forms) +// +// * PADDW mm, mm [MMX] +// * PADDW m64, mm [MMX] +// * PADDW xmm, xmm [SSE2] +// * PADDW m128, xmm [SSE2] +// +func (self *Program) PADDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PADDW", 2, Operands { v0, v1 }) + // PADDW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PADDW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PADDW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PADDW") + } + return p +} + +// PALIGNR performs "Packed Align Right". +// +// Mnemonic : PALIGNR +// Supported forms : (4 forms) +// +// * PALIGNR imm8, mm, mm [SSSE3] +// * PALIGNR imm8, m64, mm [SSSE3] +// * PALIGNR imm8, xmm, xmm [SSSE3] +// * PALIGNR imm8, m128, xmm [SSSE3] +// +func (self *Program) PALIGNR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PALIGNR", 3, Operands { v0, v1, v2 }) + // PALIGNR imm8, mm, mm + if isImm8(v0) && isMM(v1) && isMM(v2) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PALIGNR imm8, m64, mm + if isImm8(v0) && isM64(v1) && isMM(v2) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0f) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // PALIGNR imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PALIGNR imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0f) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PALIGNR") + } + return p +} + +// PAND performs "Packed Bitwise Logical AND". +// +// Mnemonic : PAND +// Supported forms : (4 forms) +// +// * PAND mm, mm [MMX] +// * PAND m64, mm [MMX] +// * PAND xmm, xmm [SSE2] +// * PAND m128, xmm [SSE2] +// +func (self *Program) PAND(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PAND", 2, Operands { v0, v1 }) + // PAND mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAND m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PAND xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAND m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PAND") + } + return p +} + +// PANDN performs "Packed Bitwise Logical AND NOT". +// +// Mnemonic : PANDN +// Supported forms : (4 forms) +// +// * PANDN mm, mm [MMX] +// * PANDN m64, mm [MMX] +// * PANDN xmm, xmm [SSE2] +// * PANDN m128, xmm [SSE2] +// +func (self *Program) PANDN(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PANDN", 2, Operands { v0, v1 }) + // PANDN mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PANDN m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PANDN xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PANDN m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xdf) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PANDN") + } + return p +} + +// PAUSE performs "Spin Loop Hint". +// +// Mnemonic : PAUSE +// Supported forms : (1 form) +// +// * PAUSE +// +func (self *Program) PAUSE() *Instruction { + p := self.alloc("PAUSE", 0, Operands { }) + // PAUSE + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x90) + }) + return p +} + +// PAVGB performs "Average Packed Byte Integers". +// +// Mnemonic : PAVGB +// Supported forms : (4 forms) +// +// * PAVGB mm, mm [MMX+] +// * PAVGB m64, mm [MMX+] +// * PAVGB xmm, xmm [SSE2] +// * PAVGB m128, xmm [SSE2] +// +func (self *Program) PAVGB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PAVGB", 2, Operands { v0, v1 }) + // PAVGB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAVGB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PAVGB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAVGB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PAVGB") + } + return p +} + +// PAVGUSB performs "Average Packed Byte Integers". +// +// Mnemonic : PAVGUSB +// Supported forms : (2 forms) +// +// * PAVGUSB mm, mm [3dnow!] +// * PAVGUSB m64, mm [3dnow!] +// +func (self *Program) PAVGUSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PAVGUSB", 2, Operands { v0, v1 }) + // PAVGUSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xbf) + }) + } + // PAVGUSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xbf) + }) + } + if p.len == 0 { + panic("invalid operands for PAVGUSB") + } + return p +} + +// PAVGW performs "Average Packed Word Integers". +// +// Mnemonic : PAVGW +// Supported forms : (4 forms) +// +// * PAVGW mm, mm [MMX+] +// * PAVGW m64, mm [MMX+] +// * PAVGW xmm, xmm [SSE2] +// * PAVGW m128, xmm [SSE2] +// +func (self *Program) PAVGW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PAVGW", 2, Operands { v0, v1 }) + // PAVGW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAVGW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PAVGW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PAVGW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PAVGW") + } + return p +} + +// PBLENDVB performs "Variable Blend Packed Bytes". +// +// Mnemonic : PBLENDVB +// Supported forms : (2 forms) +// +// * PBLENDVB xmm0, xmm, xmm [SSE4.1] +// * PBLENDVB xmm0, m128, xmm [SSE4.1] +// +func (self *Program) PBLENDVB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PBLENDVB", 3, Operands { v0, v1, v2 }) + // PBLENDVB xmm0, xmm, xmm + if v0 == XMM0 && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // PBLENDVB xmm0, m128, xmm + if v0 == XMM0 && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PBLENDVB") + } + return p +} + +// PBLENDW performs "Blend Packed Words". +// +// Mnemonic : PBLENDW +// Supported forms : (2 forms) +// +// * PBLENDW imm8, xmm, xmm [SSE4.1] +// * PBLENDW imm8, m128, xmm [SSE4.1] +// +func (self *Program) PBLENDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PBLENDW", 3, Operands { v0, v1, v2 }) + // PBLENDW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PBLENDW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0e) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PBLENDW") + } + return p +} + +// PCLMULQDQ performs "Carry-Less Quadword Multiplication". +// +// Mnemonic : PCLMULQDQ +// Supported forms : (2 forms) +// +// * PCLMULQDQ imm8, xmm, xmm [PCLMULQDQ] +// * PCLMULQDQ imm8, m128, xmm [PCLMULQDQ] +// +func (self *Program) PCLMULQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCLMULQDQ", 3, Operands { v0, v1, v2 }) + // PCLMULQDQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_PCLMULQDQ) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x44) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCLMULQDQ imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_PCLMULQDQ) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x44) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCLMULQDQ") + } + return p +} + +// PCMPEQB performs "Compare Packed Byte Data for Equality". +// +// Mnemonic : PCMPEQB +// Supported forms : (4 forms) +// +// * PCMPEQB mm, mm [MMX] +// * PCMPEQB m64, mm [MMX] +// * PCMPEQB xmm, xmm [SSE2] +// * PCMPEQB m128, xmm [SSE2] +// +func (self *Program) PCMPEQB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPEQB", 2, Operands { v0, v1 }) + // PCMPEQB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x74) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x74) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPEQB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x74) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x74) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPEQB") + } + return p +} + +// PCMPEQD performs "Compare Packed Doubleword Data for Equality". +// +// Mnemonic : PCMPEQD +// Supported forms : (4 forms) +// +// * PCMPEQD mm, mm [MMX] +// * PCMPEQD m64, mm [MMX] +// * PCMPEQD xmm, xmm [SSE2] +// * PCMPEQD m128, xmm [SSE2] +// +func (self *Program) PCMPEQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPEQD", 2, Operands { v0, v1 }) + // PCMPEQD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x76) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x76) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPEQD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x76) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x76) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPEQD") + } + return p +} + +// PCMPEQQ performs "Compare Packed Quadword Data for Equality". +// +// Mnemonic : PCMPEQQ +// Supported forms : (2 forms) +// +// * PCMPEQQ xmm, xmm [SSE4.1] +// * PCMPEQQ m128, xmm [SSE4.1] +// +func (self *Program) PCMPEQQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPEQQ", 2, Operands { v0, v1 }) + // PCMPEQQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x29) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPEQQ") + } + return p +} + +// PCMPEQW performs "Compare Packed Word Data for Equality". +// +// Mnemonic : PCMPEQW +// Supported forms : (4 forms) +// +// * PCMPEQW mm, mm [MMX] +// * PCMPEQW m64, mm [MMX] +// * PCMPEQW xmm, xmm [SSE2] +// * PCMPEQW m128, xmm [SSE2] +// +func (self *Program) PCMPEQW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPEQW", 2, Operands { v0, v1 }) + // PCMPEQW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x75) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x75) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPEQW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x75) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPEQW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x75) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPEQW") + } + return p +} + +// PCMPESTRI performs "Packed Compare Explicit Length Strings, Return Index". +// +// Mnemonic : PCMPESTRI +// Supported forms : (2 forms) +// +// * PCMPESTRI imm8, xmm, xmm [SSE4.2] +// * PCMPESTRI imm8, m128, xmm [SSE4.2] +// +func (self *Program) PCMPESTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCMPESTRI", 3, Operands { v0, v1, v2 }) + // PCMPESTRI imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCMPESTRI imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPESTRI") + } + return p +} + +// PCMPESTRM performs "Packed Compare Explicit Length Strings, Return Mask". +// +// Mnemonic : PCMPESTRM +// Supported forms : (2 forms) +// +// * PCMPESTRM imm8, xmm, xmm [SSE4.2] +// * PCMPESTRM imm8, m128, xmm [SSE4.2] +// +func (self *Program) PCMPESTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCMPESTRM", 3, Operands { v0, v1, v2 }) + // PCMPESTRM imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCMPESTRM imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPESTRM") + } + return p +} + +// PCMPGTB performs "Compare Packed Signed Byte Integers for Greater Than". +// +// Mnemonic : PCMPGTB +// Supported forms : (4 forms) +// +// * PCMPGTB mm, mm [MMX] +// * PCMPGTB m64, mm [MMX] +// * PCMPGTB xmm, xmm [SSE2] +// * PCMPGTB m128, xmm [SSE2] +// +func (self *Program) PCMPGTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPGTB", 2, Operands { v0, v1 }) + // PCMPGTB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x64) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x64) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPGTB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x64) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x64) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPGTB") + } + return p +} + +// PCMPGTD performs "Compare Packed Signed Doubleword Integers for Greater Than". +// +// Mnemonic : PCMPGTD +// Supported forms : (4 forms) +// +// * PCMPGTD mm, mm [MMX] +// * PCMPGTD m64, mm [MMX] +// * PCMPGTD xmm, xmm [SSE2] +// * PCMPGTD m128, xmm [SSE2] +// +func (self *Program) PCMPGTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPGTD", 2, Operands { v0, v1 }) + // PCMPGTD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x66) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x66) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPGTD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x66) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x66) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPGTD") + } + return p +} + +// PCMPGTQ performs "Compare Packed Data for Greater Than". +// +// Mnemonic : PCMPGTQ +// Supported forms : (2 forms) +// +// * PCMPGTQ xmm, xmm [SSE4.2] +// * PCMPGTQ m128, xmm [SSE4.2] +// +func (self *Program) PCMPGTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPGTQ", 2, Operands { v0, v1 }) + // PCMPGTQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x37) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x37) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPGTQ") + } + return p +} + +// PCMPGTW performs "Compare Packed Signed Word Integers for Greater Than". +// +// Mnemonic : PCMPGTW +// Supported forms : (4 forms) +// +// * PCMPGTW mm, mm [MMX] +// * PCMPGTW m64, mm [MMX] +// * PCMPGTW xmm, xmm [SSE2] +// * PCMPGTW m128, xmm [SSE2] +// +func (self *Program) PCMPGTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PCMPGTW", 2, Operands { v0, v1 }) + // PCMPGTW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x65) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x65) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PCMPGTW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x65) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PCMPGTW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x65) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPGTW") + } + return p +} + +// PCMPISTRI performs "Packed Compare Implicit Length Strings, Return Index". +// +// Mnemonic : PCMPISTRI +// Supported forms : (2 forms) +// +// * PCMPISTRI imm8, xmm, xmm [SSE4.2] +// * PCMPISTRI imm8, m128, xmm [SSE4.2] +// +func (self *Program) PCMPISTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCMPISTRI", 3, Operands { v0, v1, v2 }) + // PCMPISTRI imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCMPISTRI imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPISTRI") + } + return p +} + +// PCMPISTRM performs "Packed Compare Implicit Length Strings, Return Mask". +// +// Mnemonic : PCMPISTRM +// Supported forms : (2 forms) +// +// * PCMPISTRM imm8, xmm, xmm [SSE4.2] +// * PCMPISTRM imm8, m128, xmm [SSE4.2] +// +func (self *Program) PCMPISTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PCMPISTRM", 3, Operands { v0, v1, v2 }) + // PCMPISTRM imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PCMPISTRM imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PCMPISTRM") + } + return p +} + +// PDEP performs "Parallel Bits Deposit". +// +// Mnemonic : PDEP +// Supported forms : (4 forms) +// +// * PDEP r32, r32, r32 [BMI2] +// * PDEP m32, r32, r32 [BMI2] +// * PDEP r64, r64, r64 [BMI2] +// * PDEP m64, r64, r64 [BMI2] +// +func (self *Program) PDEP(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PDEP", 3, Operands { v0, v1, v2 }) + // PDEP r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7b ^ (hlcode(v[1]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // PDEP m32, r32, r32 + if isM32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // PDEP r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb ^ (hlcode(v[1]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // PDEP m64, r64, r64 + if isM64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PDEP") + } + return p +} + +// PEXT performs "Parallel Bits Extract". +// +// Mnemonic : PEXT +// Supported forms : (4 forms) +// +// * PEXT r32, r32, r32 [BMI2] +// * PEXT m32, r32, r32 [BMI2] +// * PEXT r64, r64, r64 [BMI2] +// * PEXT m64, r64, r64 [BMI2] +// +func (self *Program) PEXT(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXT", 3, Operands { v0, v1, v2 }) + // PEXT r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7a ^ (hlcode(v[1]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // PEXT m32, r32, r32 + if isM32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x02, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // PEXT r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfa ^ (hlcode(v[1]) << 3)) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // PEXT m64, r64, r64 + if isM64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x82, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PEXT") + } + return p +} + +// PEXTRB performs "Extract Byte". +// +// Mnemonic : PEXTRB +// Supported forms : (2 forms) +// +// * PEXTRB imm8, xmm, r32 [SSE4.1] +// * PEXTRB imm8, xmm, m8 [SSE4.1] +// +func (self *Program) PEXTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXTRB", 3, Operands { v0, v1, v2 }) + // PEXTRB imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRB imm8, xmm, m8 + if isImm8(v0) && isXMM(v1) && isM8(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PEXTRB") + } + return p +} + +// PEXTRD performs "Extract Doubleword". +// +// Mnemonic : PEXTRD +// Supported forms : (2 forms) +// +// * PEXTRD imm8, xmm, r32 [SSE4.1] +// * PEXTRD imm8, xmm, m32 [SSE4.1] +// +func (self *Program) PEXTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXTRD", 3, Operands { v0, v1, v2 }) + // PEXTRD imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRD imm8, xmm, m32 + if isImm8(v0) && isXMM(v1) && isM32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PEXTRD") + } + return p +} + +// PEXTRQ performs "Extract Quadword". +// +// Mnemonic : PEXTRQ +// Supported forms : (2 forms) +// +// * PEXTRQ imm8, xmm, r64 [SSE4.1] +// * PEXTRQ imm8, xmm, m64 [SSE4.1] +// +func (self *Program) PEXTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXTRQ", 3, Operands { v0, v1, v2 }) + // PEXTRQ imm8, xmm, r64 + if isImm8(v0) && isXMM(v1) && isReg64(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRQ imm8, xmm, m64 + if isImm8(v0) && isXMM(v1) && isM64(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PEXTRQ") + } + return p +} + +// PEXTRW performs "Extract Word". +// +// Mnemonic : PEXTRW +// Supported forms : (3 forms) +// +// * PEXTRW imm8, mm, r32 [MMX+] +// * PEXTRW imm8, xmm, r32 [SSE4.1] +// * PEXTRW imm8, xmm, m16 [SSE4.1] +// +func (self *Program) PEXTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PEXTRW", 3, Operands { v0, v1, v2 }) + // PEXTRW imm8, mm, r32 + if isImm8(v0) && isMM(v1) && isReg32(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRW imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PEXTRW imm8, xmm, m16 + if isImm8(v0) && isXMM(v1) && isM16(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PEXTRW") + } + return p +} + +// PF2ID performs "Packed Floating-Point to Integer Doubleword Converson". +// +// Mnemonic : PF2ID +// Supported forms : (2 forms) +// +// * PF2ID mm, mm [3dnow!] +// * PF2ID m64, mm [3dnow!] +// +func (self *Program) PF2ID(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PF2ID", 2, Operands { v0, v1 }) + // PF2ID mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x1d) + }) + } + // PF2ID m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x1d) + }) + } + if p.len == 0 { + panic("invalid operands for PF2ID") + } + return p +} + +// PF2IW performs "Packed Floating-Point to Integer Word Conversion". +// +// Mnemonic : PF2IW +// Supported forms : (2 forms) +// +// * PF2IW mm, mm [3dnow!+] +// * PF2IW m64, mm [3dnow!+] +// +func (self *Program) PF2IW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PF2IW", 2, Operands { v0, v1 }) + // PF2IW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x1c) + }) + } + // PF2IW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x1c) + }) + } + if p.len == 0 { + panic("invalid operands for PF2IW") + } + return p +} + +// PFACC performs "Packed Floating-Point Accumulate". +// +// Mnemonic : PFACC +// Supported forms : (2 forms) +// +// * PFACC mm, mm [3dnow!] +// * PFACC m64, mm [3dnow!] +// +func (self *Program) PFACC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFACC", 2, Operands { v0, v1 }) + // PFACC mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xae) + }) + } + // PFACC m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xae) + }) + } + if p.len == 0 { + panic("invalid operands for PFACC") + } + return p +} + +// PFADD performs "Packed Floating-Point Add". +// +// Mnemonic : PFADD +// Supported forms : (2 forms) +// +// * PFADD mm, mm [3dnow!] +// * PFADD m64, mm [3dnow!] +// +func (self *Program) PFADD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFADD", 2, Operands { v0, v1 }) + // PFADD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x9e) + }) + } + // PFADD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x9e) + }) + } + if p.len == 0 { + panic("invalid operands for PFADD") + } + return p +} + +// PFCMPEQ performs "Packed Floating-Point Compare for Equal". +// +// Mnemonic : PFCMPEQ +// Supported forms : (2 forms) +// +// * PFCMPEQ mm, mm [3dnow!] +// * PFCMPEQ m64, mm [3dnow!] +// +func (self *Program) PFCMPEQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFCMPEQ", 2, Operands { v0, v1 }) + // PFCMPEQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xb0) + }) + } + // PFCMPEQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xb0) + }) + } + if p.len == 0 { + panic("invalid operands for PFCMPEQ") + } + return p +} + +// PFCMPGE performs "Packed Floating-Point Compare for Greater or Equal". +// +// Mnemonic : PFCMPGE +// Supported forms : (2 forms) +// +// * PFCMPGE mm, mm [3dnow!] +// * PFCMPGE m64, mm [3dnow!] +// +func (self *Program) PFCMPGE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFCMPGE", 2, Operands { v0, v1 }) + // PFCMPGE mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x90) + }) + } + // PFCMPGE m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x90) + }) + } + if p.len == 0 { + panic("invalid operands for PFCMPGE") + } + return p +} + +// PFCMPGT performs "Packed Floating-Point Compare for Greater Than". +// +// Mnemonic : PFCMPGT +// Supported forms : (2 forms) +// +// * PFCMPGT mm, mm [3dnow!] +// * PFCMPGT m64, mm [3dnow!] +// +func (self *Program) PFCMPGT(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFCMPGT", 2, Operands { v0, v1 }) + // PFCMPGT mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xa0) + }) + } + // PFCMPGT m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xa0) + }) + } + if p.len == 0 { + panic("invalid operands for PFCMPGT") + } + return p +} + +// PFMAX performs "Packed Floating-Point Maximum". +// +// Mnemonic : PFMAX +// Supported forms : (2 forms) +// +// * PFMAX mm, mm [3dnow!] +// * PFMAX m64, mm [3dnow!] +// +func (self *Program) PFMAX(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFMAX", 2, Operands { v0, v1 }) + // PFMAX mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xa4) + }) + } + // PFMAX m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xa4) + }) + } + if p.len == 0 { + panic("invalid operands for PFMAX") + } + return p +} + +// PFMIN performs "Packed Floating-Point Minimum". +// +// Mnemonic : PFMIN +// Supported forms : (2 forms) +// +// * PFMIN mm, mm [3dnow!] +// * PFMIN m64, mm [3dnow!] +// +func (self *Program) PFMIN(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFMIN", 2, Operands { v0, v1 }) + // PFMIN mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x94) + }) + } + // PFMIN m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x94) + }) + } + if p.len == 0 { + panic("invalid operands for PFMIN") + } + return p +} + +// PFMUL performs "Packed Floating-Point Multiply". +// +// Mnemonic : PFMUL +// Supported forms : (2 forms) +// +// * PFMUL mm, mm [3dnow!] +// * PFMUL m64, mm [3dnow!] +// +func (self *Program) PFMUL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFMUL", 2, Operands { v0, v1 }) + // PFMUL mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xb4) + }) + } + // PFMUL m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xb4) + }) + } + if p.len == 0 { + panic("invalid operands for PFMUL") + } + return p +} + +// PFNACC performs "Packed Floating-Point Negative Accumulate". +// +// Mnemonic : PFNACC +// Supported forms : (2 forms) +// +// * PFNACC mm, mm [3dnow!+] +// * PFNACC m64, mm [3dnow!+] +// +func (self *Program) PFNACC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFNACC", 2, Operands { v0, v1 }) + // PFNACC mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x8a) + }) + } + // PFNACC m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x8a) + }) + } + if p.len == 0 { + panic("invalid operands for PFNACC") + } + return p +} + +// PFPNACC performs "Packed Floating-Point Positive-Negative Accumulate". +// +// Mnemonic : PFPNACC +// Supported forms : (2 forms) +// +// * PFPNACC mm, mm [3dnow!+] +// * PFPNACC m64, mm [3dnow!+] +// +func (self *Program) PFPNACC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFPNACC", 2, Operands { v0, v1 }) + // PFPNACC mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x8e) + }) + } + // PFPNACC m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x8e) + }) + } + if p.len == 0 { + panic("invalid operands for PFPNACC") + } + return p +} + +// PFRCP performs "Packed Floating-Point Reciprocal Approximation". +// +// Mnemonic : PFRCP +// Supported forms : (2 forms) +// +// * PFRCP mm, mm [3dnow!] +// * PFRCP m64, mm [3dnow!] +// +func (self *Program) PFRCP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRCP", 2, Operands { v0, v1 }) + // PFRCP mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x96) + }) + } + // PFRCP m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x96) + }) + } + if p.len == 0 { + panic("invalid operands for PFRCP") + } + return p +} + +// PFRCPIT1 performs "Packed Floating-Point Reciprocal Iteration 1". +// +// Mnemonic : PFRCPIT1 +// Supported forms : (2 forms) +// +// * PFRCPIT1 mm, mm [3dnow!] +// * PFRCPIT1 m64, mm [3dnow!] +// +func (self *Program) PFRCPIT1(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRCPIT1", 2, Operands { v0, v1 }) + // PFRCPIT1 mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xa6) + }) + } + // PFRCPIT1 m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xa6) + }) + } + if p.len == 0 { + panic("invalid operands for PFRCPIT1") + } + return p +} + +// PFRCPIT2 performs "Packed Floating-Point Reciprocal Iteration 2". +// +// Mnemonic : PFRCPIT2 +// Supported forms : (2 forms) +// +// * PFRCPIT2 mm, mm [3dnow!] +// * PFRCPIT2 m64, mm [3dnow!] +// +func (self *Program) PFRCPIT2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRCPIT2", 2, Operands { v0, v1 }) + // PFRCPIT2 mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xb6) + }) + } + // PFRCPIT2 m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xb6) + }) + } + if p.len == 0 { + panic("invalid operands for PFRCPIT2") + } + return p +} + +// PFRSQIT1 performs "Packed Floating-Point Reciprocal Square Root Iteration 1". +// +// Mnemonic : PFRSQIT1 +// Supported forms : (2 forms) +// +// * PFRSQIT1 mm, mm [3dnow!] +// * PFRSQIT1 m64, mm [3dnow!] +// +func (self *Program) PFRSQIT1(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRSQIT1", 2, Operands { v0, v1 }) + // PFRSQIT1 mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xa7) + }) + } + // PFRSQIT1 m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xa7) + }) + } + if p.len == 0 { + panic("invalid operands for PFRSQIT1") + } + return p +} + +// PFRSQRT performs "Packed Floating-Point Reciprocal Square Root Approximation". +// +// Mnemonic : PFRSQRT +// Supported forms : (2 forms) +// +// * PFRSQRT mm, mm [3dnow!] +// * PFRSQRT m64, mm [3dnow!] +// +func (self *Program) PFRSQRT(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFRSQRT", 2, Operands { v0, v1 }) + // PFRSQRT mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x97) + }) + } + // PFRSQRT m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x97) + }) + } + if p.len == 0 { + panic("invalid operands for PFRSQRT") + } + return p +} + +// PFSUB performs "Packed Floating-Point Subtract". +// +// Mnemonic : PFSUB +// Supported forms : (2 forms) +// +// * PFSUB mm, mm [3dnow!] +// * PFSUB m64, mm [3dnow!] +// +func (self *Program) PFSUB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFSUB", 2, Operands { v0, v1 }) + // PFSUB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x9a) + }) + } + // PFSUB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x9a) + }) + } + if p.len == 0 { + panic("invalid operands for PFSUB") + } + return p +} + +// PFSUBR performs "Packed Floating-Point Subtract Reverse". +// +// Mnemonic : PFSUBR +// Supported forms : (2 forms) +// +// * PFSUBR mm, mm [3dnow!] +// * PFSUBR m64, mm [3dnow!] +// +func (self *Program) PFSUBR(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PFSUBR", 2, Operands { v0, v1 }) + // PFSUBR mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xaa) + }) + } + // PFSUBR m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xaa) + }) + } + if p.len == 0 { + panic("invalid operands for PFSUBR") + } + return p +} + +// PHADDD performs "Packed Horizontal Add Doubleword Integer". +// +// Mnemonic : PHADDD +// Supported forms : (4 forms) +// +// * PHADDD mm, mm [SSSE3] +// * PHADDD m64, mm [SSSE3] +// * PHADDD xmm, xmm [SSSE3] +// * PHADDD m128, xmm [SSSE3] +// +func (self *Program) PHADDD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHADDD", 2, Operands { v0, v1 }) + // PHADDD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x02) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x02) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHADDD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x02) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x02) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHADDD") + } + return p +} + +// PHADDSW performs "Packed Horizontal Add Signed Word Integers with Signed Saturation". +// +// Mnemonic : PHADDSW +// Supported forms : (4 forms) +// +// * PHADDSW mm, mm [SSSE3] +// * PHADDSW m64, mm [SSSE3] +// * PHADDSW xmm, xmm [SSSE3] +// * PHADDSW m128, xmm [SSSE3] +// +func (self *Program) PHADDSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHADDSW", 2, Operands { v0, v1 }) + // PHADDSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHADDSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x03) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x03) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHADDSW") + } + return p +} + +// PHADDW performs "Packed Horizontal Add Word Integers". +// +// Mnemonic : PHADDW +// Supported forms : (4 forms) +// +// * PHADDW mm, mm [SSSE3] +// * PHADDW m64, mm [SSSE3] +// * PHADDW xmm, xmm [SSSE3] +// * PHADDW m128, xmm [SSSE3] +// +func (self *Program) PHADDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHADDW", 2, Operands { v0, v1 }) + // PHADDW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x01) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x01) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHADDW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x01) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHADDW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x01) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHADDW") + } + return p +} + +// PHMINPOSUW performs "Packed Horizontal Minimum of Unsigned Word Integers". +// +// Mnemonic : PHMINPOSUW +// Supported forms : (2 forms) +// +// * PHMINPOSUW xmm, xmm [SSE4.1] +// * PHMINPOSUW m128, xmm [SSE4.1] +// +func (self *Program) PHMINPOSUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHMINPOSUW", 2, Operands { v0, v1 }) + // PHMINPOSUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHMINPOSUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHMINPOSUW") + } + return p +} + +// PHSUBD performs "Packed Horizontal Subtract Doubleword Integers". +// +// Mnemonic : PHSUBD +// Supported forms : (4 forms) +// +// * PHSUBD mm, mm [SSSE3] +// * PHSUBD m64, mm [SSSE3] +// * PHSUBD xmm, xmm [SSSE3] +// * PHSUBD m128, xmm [SSSE3] +// +func (self *Program) PHSUBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHSUBD", 2, Operands { v0, v1 }) + // PHSUBD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x06) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x06) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHSUBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x06) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x06) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHSUBD") + } + return p +} + +// PHSUBSW performs "Packed Horizontal Subtract Signed Word Integers with Signed Saturation". +// +// Mnemonic : PHSUBSW +// Supported forms : (4 forms) +// +// * PHSUBSW mm, mm [SSSE3] +// * PHSUBSW m64, mm [SSSE3] +// * PHSUBSW xmm, xmm [SSSE3] +// * PHSUBSW m128, xmm [SSSE3] +// +func (self *Program) PHSUBSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHSUBSW", 2, Operands { v0, v1 }) + // PHSUBSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x07) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x07) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHSUBSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x07) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x07) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHSUBSW") + } + return p +} + +// PHSUBW performs "Packed Horizontal Subtract Word Integers". +// +// Mnemonic : PHSUBW +// Supported forms : (4 forms) +// +// * PHSUBW mm, mm [SSSE3] +// * PHSUBW m64, mm [SSSE3] +// * PHSUBW xmm, xmm [SSSE3] +// * PHSUBW m128, xmm [SSSE3] +// +func (self *Program) PHSUBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PHSUBW", 2, Operands { v0, v1 }) + // PHSUBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x05) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x05) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PHSUBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x05) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PHSUBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x05) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PHSUBW") + } + return p +} + +// PI2FD performs "Packed Integer to Floating-Point Doubleword Conversion". +// +// Mnemonic : PI2FD +// Supported forms : (2 forms) +// +// * PI2FD mm, mm [3dnow!] +// * PI2FD m64, mm [3dnow!] +// +func (self *Program) PI2FD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PI2FD", 2, Operands { v0, v1 }) + // PI2FD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x0d) + }) + } + // PI2FD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x0d) + }) + } + if p.len == 0 { + panic("invalid operands for PI2FD") + } + return p +} + +// PI2FW performs "Packed Integer to Floating-Point Word Conversion". +// +// Mnemonic : PI2FW +// Supported forms : (2 forms) +// +// * PI2FW mm, mm [3dnow!+] +// * PI2FW m64, mm [3dnow!+] +// +func (self *Program) PI2FW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PI2FW", 2, Operands { v0, v1 }) + // PI2FW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0x0c) + }) + } + // PI2FW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0x0c) + }) + } + if p.len == 0 { + panic("invalid operands for PI2FW") + } + return p +} + +// PINSRB performs "Insert Byte". +// +// Mnemonic : PINSRB +// Supported forms : (2 forms) +// +// * PINSRB imm8, r32, xmm [SSE4.1] +// * PINSRB imm8, m8, xmm [SSE4.1] +// +func (self *Program) PINSRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PINSRB", 3, Operands { v0, v1, v2 }) + // PINSRB imm8, r32, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x20) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRB imm8, m8, xmm + if isImm8(v0) && isM8(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x20) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PINSRB") + } + return p +} + +// PINSRD performs "Insert Doubleword". +// +// Mnemonic : PINSRD +// Supported forms : (2 forms) +// +// * PINSRD imm8, r32, xmm [SSE4.1] +// * PINSRD imm8, m32, xmm [SSE4.1] +// +func (self *Program) PINSRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PINSRD", 3, Operands { v0, v1, v2 }) + // PINSRD imm8, r32, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x22) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRD imm8, m32, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x22) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PINSRD") + } + return p +} + +// PINSRQ performs "Insert Quadword". +// +// Mnemonic : PINSRQ +// Supported forms : (2 forms) +// +// * PINSRQ imm8, r64, xmm [SSE4.1] +// * PINSRQ imm8, m64, xmm [SSE4.1] +// +func (self *Program) PINSRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PINSRQ", 3, Operands { v0, v1, v2 }) + // PINSRQ imm8, r64, xmm + if isImm8(v0) && isReg64(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x48 | hcode(v[2]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x22) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRQ imm8, m64, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexm(1, hcode(v[2]), addr(v[1])) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x22) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PINSRQ") + } + return p +} + +// PINSRW performs "Insert Word". +// +// Mnemonic : PINSRW +// Supported forms : (4 forms) +// +// * PINSRW imm8, r32, mm [MMX+] +// * PINSRW imm8, m16, mm [MMX+] +// * PINSRW imm8, r32, xmm [SSE2] +// * PINSRW imm8, m16, xmm [SSE2] +// +func (self *Program) PINSRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PINSRW", 3, Operands { v0, v1, v2 }) + // PINSRW imm8, r32, mm + if isImm8(v0) && isReg32(v1) && isMM(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRW imm8, m16, mm + if isImm8(v0) && isM16(v1) && isMM(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc4) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRW imm8, r32, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PINSRW imm8, m16, xmm + if isImm8(v0) && isM16(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc4) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PINSRW") + } + return p +} + +// PMADDUBSW performs "Multiply and Add Packed Signed and Unsigned Byte Integers". +// +// Mnemonic : PMADDUBSW +// Supported forms : (4 forms) +// +// * PMADDUBSW mm, mm [SSSE3] +// * PMADDUBSW m64, mm [SSSE3] +// * PMADDUBSW xmm, xmm [SSSE3] +// * PMADDUBSW m128, xmm [SSSE3] +// +func (self *Program) PMADDUBSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMADDUBSW", 2, Operands { v0, v1 }) + // PMADDUBSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x04) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMADDUBSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x04) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMADDUBSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x04) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMADDUBSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x04) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMADDUBSW") + } + return p +} + +// PMADDWD performs "Multiply and Add Packed Signed Word Integers". +// +// Mnemonic : PMADDWD +// Supported forms : (4 forms) +// +// * PMADDWD mm, mm [MMX] +// * PMADDWD m64, mm [MMX] +// * PMADDWD xmm, xmm [SSE2] +// * PMADDWD m128, xmm [SSE2] +// +func (self *Program) PMADDWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMADDWD", 2, Operands { v0, v1 }) + // PMADDWD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMADDWD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMADDWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMADDWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMADDWD") + } + return p +} + +// PMAXSB performs "Maximum of Packed Signed Byte Integers". +// +// Mnemonic : PMAXSB +// Supported forms : (2 forms) +// +// * PMAXSB xmm, xmm [SSE4.1] +// * PMAXSB m128, xmm [SSE4.1] +// +func (self *Program) PMAXSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXSB", 2, Operands { v0, v1 }) + // PMAXSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXSB") + } + return p +} + +// PMAXSD performs "Maximum of Packed Signed Doubleword Integers". +// +// Mnemonic : PMAXSD +// Supported forms : (2 forms) +// +// * PMAXSD xmm, xmm [SSE4.1] +// * PMAXSD m128, xmm [SSE4.1] +// +func (self *Program) PMAXSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXSD", 2, Operands { v0, v1 }) + // PMAXSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXSD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXSD") + } + return p +} + +// PMAXSW performs "Maximum of Packed Signed Word Integers". +// +// Mnemonic : PMAXSW +// Supported forms : (4 forms) +// +// * PMAXSW mm, mm [MMX+] +// * PMAXSW m64, mm [MMX+] +// * PMAXSW xmm, xmm [SSE2] +// * PMAXSW m128, xmm [SSE2] +// +func (self *Program) PMAXSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXSW", 2, Operands { v0, v1 }) + // PMAXSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xee) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xee) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMAXSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xee) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xee) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXSW") + } + return p +} + +// PMAXUB performs "Maximum of Packed Unsigned Byte Integers". +// +// Mnemonic : PMAXUB +// Supported forms : (4 forms) +// +// * PMAXUB mm, mm [MMX+] +// * PMAXUB m64, mm [MMX+] +// * PMAXUB xmm, xmm [SSE2] +// * PMAXUB m128, xmm [SSE2] +// +func (self *Program) PMAXUB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXUB", 2, Operands { v0, v1 }) + // PMAXUB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xde) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXUB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xde) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMAXUB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xde) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXUB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xde) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXUB") + } + return p +} + +// PMAXUD performs "Maximum of Packed Unsigned Doubleword Integers". +// +// Mnemonic : PMAXUD +// Supported forms : (2 forms) +// +// * PMAXUD xmm, xmm [SSE4.1] +// * PMAXUD m128, xmm [SSE4.1] +// +func (self *Program) PMAXUD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXUD", 2, Operands { v0, v1 }) + // PMAXUD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXUD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXUD") + } + return p +} + +// PMAXUW performs "Maximum of Packed Unsigned Word Integers". +// +// Mnemonic : PMAXUW +// Supported forms : (2 forms) +// +// * PMAXUW xmm, xmm [SSE4.1] +// * PMAXUW m128, xmm [SSE4.1] +// +func (self *Program) PMAXUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMAXUW", 2, Operands { v0, v1 }) + // PMAXUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMAXUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMAXUW") + } + return p +} + +// PMINSB performs "Minimum of Packed Signed Byte Integers". +// +// Mnemonic : PMINSB +// Supported forms : (2 forms) +// +// * PMINSB xmm, xmm [SSE4.1] +// * PMINSB m128, xmm [SSE4.1] +// +func (self *Program) PMINSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINSB", 2, Operands { v0, v1 }) + // PMINSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x38) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINSB") + } + return p +} + +// PMINSD performs "Minimum of Packed Signed Doubleword Integers". +// +// Mnemonic : PMINSD +// Supported forms : (2 forms) +// +// * PMINSD xmm, xmm [SSE4.1] +// * PMINSD m128, xmm [SSE4.1] +// +func (self *Program) PMINSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINSD", 2, Operands { v0, v1 }) + // PMINSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINSD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINSD") + } + return p +} + +// PMINSW performs "Minimum of Packed Signed Word Integers". +// +// Mnemonic : PMINSW +// Supported forms : (4 forms) +// +// * PMINSW mm, mm [MMX+] +// * PMINSW m64, mm [MMX+] +// * PMINSW xmm, xmm [SSE2] +// * PMINSW m128, xmm [SSE2] +// +func (self *Program) PMINSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINSW", 2, Operands { v0, v1 }) + // PMINSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xea) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xea) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMINSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xea) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xea) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINSW") + } + return p +} + +// PMINUB performs "Minimum of Packed Unsigned Byte Integers". +// +// Mnemonic : PMINUB +// Supported forms : (4 forms) +// +// * PMINUB mm, mm [MMX+] +// * PMINUB m64, mm [MMX+] +// * PMINUB xmm, xmm [SSE2] +// * PMINUB m128, xmm [SSE2] +// +func (self *Program) PMINUB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINUB", 2, Operands { v0, v1 }) + // PMINUB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xda) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINUB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xda) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMINUB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xda) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINUB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xda) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINUB") + } + return p +} + +// PMINUD performs "Minimum of Packed Unsigned Doubleword Integers". +// +// Mnemonic : PMINUD +// Supported forms : (2 forms) +// +// * PMINUD xmm, xmm [SSE4.1] +// * PMINUD m128, xmm [SSE4.1] +// +func (self *Program) PMINUD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINUD", 2, Operands { v0, v1 }) + // PMINUD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINUD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINUD") + } + return p +} + +// PMINUW performs "Minimum of Packed Unsigned Word Integers". +// +// Mnemonic : PMINUW +// Supported forms : (2 forms) +// +// * PMINUW xmm, xmm [SSE4.1] +// * PMINUW m128, xmm [SSE4.1] +// +func (self *Program) PMINUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMINUW", 2, Operands { v0, v1 }) + // PMINUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMINUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x3a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMINUW") + } + return p +} + +// PMOVMSKB performs "Move Byte Mask". +// +// Mnemonic : PMOVMSKB +// Supported forms : (2 forms) +// +// * PMOVMSKB mm, r32 [MMX+] +// * PMOVMSKB xmm, r32 [SSE2] +// +func (self *Program) PMOVMSKB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVMSKB", 2, Operands { v0, v1 }) + // PMOVMSKB mm, r32 + if isMM(v0) && isReg32(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVMSKB xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVMSKB") + } + return p +} + +// PMOVSXBD performs "Move Packed Byte Integers to Doubleword Integers with Sign Extension". +// +// Mnemonic : PMOVSXBD +// Supported forms : (2 forms) +// +// * PMOVSXBD xmm, xmm [SSE4.1] +// * PMOVSXBD m32, xmm [SSE4.1] +// +func (self *Program) PMOVSXBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXBD", 2, Operands { v0, v1 }) + // PMOVSXBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXBD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXBD") + } + return p +} + +// PMOVSXBQ performs "Move Packed Byte Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : PMOVSXBQ +// Supported forms : (2 forms) +// +// * PMOVSXBQ xmm, xmm [SSE4.1] +// * PMOVSXBQ m16, xmm [SSE4.1] +// +func (self *Program) PMOVSXBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXBQ", 2, Operands { v0, v1 }) + // PMOVSXBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXBQ m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXBQ") + } + return p +} + +// PMOVSXBW performs "Move Packed Byte Integers to Word Integers with Sign Extension". +// +// Mnemonic : PMOVSXBW +// Supported forms : (2 forms) +// +// * PMOVSXBW xmm, xmm [SSE4.1] +// * PMOVSXBW m64, xmm [SSE4.1] +// +func (self *Program) PMOVSXBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXBW", 2, Operands { v0, v1 }) + // PMOVSXBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXBW m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXBW") + } + return p +} + +// PMOVSXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : PMOVSXDQ +// Supported forms : (2 forms) +// +// * PMOVSXDQ xmm, xmm [SSE4.1] +// * PMOVSXDQ m64, xmm [SSE4.1] +// +func (self *Program) PMOVSXDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXDQ", 2, Operands { v0, v1 }) + // PMOVSXDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXDQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXDQ") + } + return p +} + +// PMOVSXWD performs "Move Packed Word Integers to Doubleword Integers with Sign Extension". +// +// Mnemonic : PMOVSXWD +// Supported forms : (2 forms) +// +// * PMOVSXWD xmm, xmm [SSE4.1] +// * PMOVSXWD m64, xmm [SSE4.1] +// +func (self *Program) PMOVSXWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXWD", 2, Operands { v0, v1 }) + // PMOVSXWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXWD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXWD") + } + return p +} + +// PMOVSXWQ performs "Move Packed Word Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : PMOVSXWQ +// Supported forms : (2 forms) +// +// * PMOVSXWQ xmm, xmm [SSE4.1] +// * PMOVSXWQ m32, xmm [SSE4.1] +// +func (self *Program) PMOVSXWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVSXWQ", 2, Operands { v0, v1 }) + // PMOVSXWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVSXWQ m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVSXWQ") + } + return p +} + +// PMOVZXBD performs "Move Packed Byte Integers to Doubleword Integers with Zero Extension". +// +// Mnemonic : PMOVZXBD +// Supported forms : (2 forms) +// +// * PMOVZXBD xmm, xmm [SSE4.1] +// * PMOVZXBD m32, xmm [SSE4.1] +// +func (self *Program) PMOVZXBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXBD", 2, Operands { v0, v1 }) + // PMOVZXBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXBD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXBD") + } + return p +} + +// PMOVZXBQ performs "Move Packed Byte Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : PMOVZXBQ +// Supported forms : (2 forms) +// +// * PMOVZXBQ xmm, xmm [SSE4.1] +// * PMOVZXBQ m16, xmm [SSE4.1] +// +func (self *Program) PMOVZXBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXBQ", 2, Operands { v0, v1 }) + // PMOVZXBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXBQ m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXBQ") + } + return p +} + +// PMOVZXBW performs "Move Packed Byte Integers to Word Integers with Zero Extension". +// +// Mnemonic : PMOVZXBW +// Supported forms : (2 forms) +// +// * PMOVZXBW xmm, xmm [SSE4.1] +// * PMOVZXBW m64, xmm [SSE4.1] +// +func (self *Program) PMOVZXBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXBW", 2, Operands { v0, v1 }) + // PMOVZXBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXBW m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXBW") + } + return p +} + +// PMOVZXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : PMOVZXDQ +// Supported forms : (2 forms) +// +// * PMOVZXDQ xmm, xmm [SSE4.1] +// * PMOVZXDQ m64, xmm [SSE4.1] +// +func (self *Program) PMOVZXDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXDQ", 2, Operands { v0, v1 }) + // PMOVZXDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXDQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXDQ") + } + return p +} + +// PMOVZXWD performs "Move Packed Word Integers to Doubleword Integers with Zero Extension". +// +// Mnemonic : PMOVZXWD +// Supported forms : (2 forms) +// +// * PMOVZXWD xmm, xmm [SSE4.1] +// * PMOVZXWD m64, xmm [SSE4.1] +// +func (self *Program) PMOVZXWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXWD", 2, Operands { v0, v1 }) + // PMOVZXWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXWD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXWD") + } + return p +} + +// PMOVZXWQ performs "Move Packed Word Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : PMOVZXWQ +// Supported forms : (2 forms) +// +// * PMOVZXWQ xmm, xmm [SSE4.1] +// * PMOVZXWQ m32, xmm [SSE4.1] +// +func (self *Program) PMOVZXWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMOVZXWQ", 2, Operands { v0, v1 }) + // PMOVZXWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMOVZXWQ m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMOVZXWQ") + } + return p +} + +// PMULDQ performs "Multiply Packed Signed Doubleword Integers and Store Quadword Result". +// +// Mnemonic : PMULDQ +// Supported forms : (2 forms) +// +// * PMULDQ xmm, xmm [SSE4.1] +// * PMULDQ m128, xmm [SSE4.1] +// +func (self *Program) PMULDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULDQ", 2, Operands { v0, v1 }) + // PMULDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULDQ") + } + return p +} + +// PMULHRSW performs "Packed Multiply Signed Word Integers and Store High Result with Round and Scale". +// +// Mnemonic : PMULHRSW +// Supported forms : (4 forms) +// +// * PMULHRSW mm, mm [SSSE3] +// * PMULHRSW m64, mm [SSSE3] +// * PMULHRSW xmm, xmm [SSSE3] +// * PMULHRSW m128, xmm [SSSE3] +// +func (self *Program) PMULHRSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULHRSW", 2, Operands { v0, v1 }) + // PMULHRSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHRSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULHRSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHRSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULHRSW") + } + return p +} + +// PMULHRW performs "Packed Multiply High Rounded Word". +// +// Mnemonic : PMULHRW +// Supported forms : (2 forms) +// +// * PMULHRW mm, mm [3dnow!] +// * PMULHRW m64, mm [3dnow!] +// +func (self *Program) PMULHRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULHRW", 2, Operands { v0, v1 }) + // PMULHRW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xb7) + }) + } + // PMULHRW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xb7) + }) + } + if p.len == 0 { + panic("invalid operands for PMULHRW") + } + return p +} + +// PMULHUW performs "Multiply Packed Unsigned Word Integers and Store High Result". +// +// Mnemonic : PMULHUW +// Supported forms : (4 forms) +// +// * PMULHUW mm, mm [MMX+] +// * PMULHUW m64, mm [MMX+] +// * PMULHUW xmm, xmm [SSE2] +// * PMULHUW m128, xmm [SSE2] +// +func (self *Program) PMULHUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULHUW", 2, Operands { v0, v1 }) + // PMULHUW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHUW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULHUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULHUW") + } + return p +} + +// PMULHW performs "Multiply Packed Signed Word Integers and Store High Result". +// +// Mnemonic : PMULHW +// Supported forms : (4 forms) +// +// * PMULHW mm, mm [MMX] +// * PMULHW m64, mm [MMX] +// * PMULHW xmm, xmm [SSE2] +// * PMULHW m128, xmm [SSE2] +// +func (self *Program) PMULHW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULHW", 2, Operands { v0, v1 }) + // PMULHW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULHW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULHW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULHW") + } + return p +} + +// PMULLD performs "Multiply Packed Signed Doubleword Integers and Store Low Result". +// +// Mnemonic : PMULLD +// Supported forms : (2 forms) +// +// * PMULLD xmm, xmm [SSE4.1] +// * PMULLD m128, xmm [SSE4.1] +// +func (self *Program) PMULLD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULLD", 2, Operands { v0, v1 }) + // PMULLD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x40) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULLD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x40) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULLD") + } + return p +} + +// PMULLW performs "Multiply Packed Signed Word Integers and Store Low Result". +// +// Mnemonic : PMULLW +// Supported forms : (4 forms) +// +// * PMULLW mm, mm [MMX] +// * PMULLW m64, mm [MMX] +// * PMULLW xmm, xmm [SSE2] +// * PMULLW m128, xmm [SSE2] +// +func (self *Program) PMULLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULLW", 2, Operands { v0, v1 }) + // PMULLW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULLW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULLW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULLW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd5) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULLW") + } + return p +} + +// PMULUDQ performs "Multiply Packed Unsigned Doubleword Integers". +// +// Mnemonic : PMULUDQ +// Supported forms : (4 forms) +// +// * PMULUDQ mm, mm [SSE2] +// * PMULUDQ m64, mm [SSE2] +// * PMULUDQ xmm, xmm [SSE2] +// * PMULUDQ m128, xmm [SSE2] +// +func (self *Program) PMULUDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PMULUDQ", 2, Operands { v0, v1 }) + // PMULUDQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULUDQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PMULUDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PMULUDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf4) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PMULUDQ") + } + return p +} + +// POPCNTL performs "Count of Number of Bits Set to 1". +// +// Mnemonic : POPCNT +// Supported forms : (2 forms) +// +// * POPCNTL r32, r32 [POPCNT] +// * POPCNTL m32, r32 [POPCNT] +// +func (self *Program) POPCNTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("POPCNTL", 2, Operands { v0, v1 }) + // POPCNTL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POPCNTL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPCNTL") + } + return p +} + +// POPCNTQ performs "Count of Number of Bits Set to 1". +// +// Mnemonic : POPCNT +// Supported forms : (2 forms) +// +// * POPCNTQ r64, r64 [POPCNT] +// * POPCNTQ m64, r64 [POPCNT] +// +func (self *Program) POPCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("POPCNTQ", 2, Operands { v0, v1 }) + // POPCNTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POPCNTQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xb8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPCNTQ") + } + return p +} + +// POPCNTW performs "Count of Number of Bits Set to 1". +// +// Mnemonic : POPCNT +// Supported forms : (2 forms) +// +// * POPCNTW r16, r16 [POPCNT] +// * POPCNTW m16, r16 [POPCNT] +// +func (self *Program) POPCNTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("POPCNTW", 2, Operands { v0, v1 }) + // POPCNTW r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POPCNTW m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_POPCNT) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xb8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPCNTW") + } + return p +} + +// POPQ performs "Pop a Value from the Stack". +// +// Mnemonic : POP +// Supported forms : (2 forms) +// +// * POPQ r64 +// * POPQ m64 +// +func (self *Program) POPQ(v0 interface{}) *Instruction { + p := self.alloc("POPQ", 1, Operands { v0 }) + // POPQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x58 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x8f) + m.emit(0xc0 | lcode(v[0])) + }) + } + // POPQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x8f) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPQ") + } + return p +} + +// POPW performs "Pop a Value from the Stack". +// +// Mnemonic : POP +// Supported forms : (2 forms) +// +// * POPW r16 +// * POPW m16 +// +func (self *Program) POPW(v0 interface{}) *Instruction { + p := self.alloc("POPW", 1, Operands { v0 }) + // POPW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x58 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x8f) + m.emit(0xc0 | lcode(v[0])) + }) + } + // POPW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0x8f) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POPW") + } + return p +} + +// POR performs "Packed Bitwise Logical OR". +// +// Mnemonic : POR +// Supported forms : (4 forms) +// +// * POR mm, mm [MMX] +// * POR m64, mm [MMX] +// * POR xmm, xmm [SSE2] +// * POR m128, xmm [SSE2] +// +func (self *Program) POR(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("POR", 2, Operands { v0, v1 }) + // POR mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POR m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xeb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // POR xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // POR m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xeb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for POR") + } + return p +} + +// PREFETCH performs "Prefetch Data into Caches". +// +// Mnemonic : PREFETCH +// Supported forms : (1 form) +// +// * PREFETCH m8 [PREFETCH] +// +func (self *Program) PREFETCH(v0 interface{}) *Instruction { + p := self.alloc("PREFETCH", 1, Operands { v0 }) + // PREFETCH m8 + if isM8(v0) { + self.require(ISA_PREFETCH) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0d) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCH") + } + return p +} + +// PREFETCHNTA performs "Prefetch Data Into Caches using NTA Hint". +// +// Mnemonic : PREFETCHNTA +// Supported forms : (1 form) +// +// * PREFETCHNTA m8 [MMX+] +// +func (self *Program) PREFETCHNTA(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHNTA", 1, Operands { v0 }) + // PREFETCHNTA m8 + if isM8(v0) { + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x18) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHNTA") + } + return p +} + +// PREFETCHT0 performs "Prefetch Data Into Caches using T0 Hint". +// +// Mnemonic : PREFETCHT0 +// Supported forms : (1 form) +// +// * PREFETCHT0 m8 [MMX+] +// +func (self *Program) PREFETCHT0(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHT0", 1, Operands { v0 }) + // PREFETCHT0 m8 + if isM8(v0) { + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x18) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHT0") + } + return p +} + +// PREFETCHT1 performs "Prefetch Data Into Caches using T1 Hint". +// +// Mnemonic : PREFETCHT1 +// Supported forms : (1 form) +// +// * PREFETCHT1 m8 [MMX+] +// +func (self *Program) PREFETCHT1(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHT1", 1, Operands { v0 }) + // PREFETCHT1 m8 + if isM8(v0) { + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x18) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHT1") + } + return p +} + +// PREFETCHT2 performs "Prefetch Data Into Caches using T2 Hint". +// +// Mnemonic : PREFETCHT2 +// Supported forms : (1 form) +// +// * PREFETCHT2 m8 [MMX+] +// +func (self *Program) PREFETCHT2(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHT2", 1, Operands { v0 }) + // PREFETCHT2 m8 + if isM8(v0) { + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x18) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHT2") + } + return p +} + +// PREFETCHW performs "Prefetch Data into Caches in Anticipation of a Write". +// +// Mnemonic : PREFETCHW +// Supported forms : (1 form) +// +// * PREFETCHW m8 [PREFETCHW] +// +func (self *Program) PREFETCHW(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHW", 1, Operands { v0 }) + // PREFETCHW m8 + if isM8(v0) { + self.require(ISA_PREFETCHW) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0d) + m.mrsd(1, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHW") + } + return p +} + +// PREFETCHWT1 performs "Prefetch Vector Data Into Caches with Intent to Write and T1 Hint". +// +// Mnemonic : PREFETCHWT1 +// Supported forms : (1 form) +// +// * PREFETCHWT1 m8 [PREFETCHWT1] +// +func (self *Program) PREFETCHWT1(v0 interface{}) *Instruction { + p := self.alloc("PREFETCHWT1", 1, Operands { v0 }) + // PREFETCHWT1 m8 + if isM8(v0) { + self.require(ISA_PREFETCHWT1) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0d) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PREFETCHWT1") + } + return p +} + +// PSADBW performs "Compute Sum of Absolute Differences". +// +// Mnemonic : PSADBW +// Supported forms : (4 forms) +// +// * PSADBW mm, mm [MMX+] +// * PSADBW m64, mm [MMX+] +// * PSADBW xmm, xmm [SSE2] +// * PSADBW m128, xmm [SSE2] +// +func (self *Program) PSADBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSADBW", 2, Operands { v0, v1 }) + // PSADBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSADBW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSADBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSADBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSADBW") + } + return p +} + +// PSHUFB performs "Packed Shuffle Bytes". +// +// Mnemonic : PSHUFB +// Supported forms : (4 forms) +// +// * PSHUFB mm, mm [SSSE3] +// * PSHUFB m64, mm [SSSE3] +// * PSHUFB xmm, xmm [SSSE3] +// * PSHUFB m128, xmm [SSSE3] +// +func (self *Program) PSHUFB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSHUFB", 2, Operands { v0, v1 }) + // PSHUFB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x00) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSHUFB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x00) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSHUFB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x00) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSHUFB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x00) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFB") + } + return p +} + +// PSHUFD performs "Shuffle Packed Doublewords". +// +// Mnemonic : PSHUFD +// Supported forms : (2 forms) +// +// * PSHUFD imm8, xmm, xmm [SSE2] +// * PSHUFD imm8, m128, xmm [SSE2] +// +func (self *Program) PSHUFD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PSHUFD", 3, Operands { v0, v1, v2 }) + // PSHUFD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSHUFD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFD") + } + return p +} + +// PSHUFHW performs "Shuffle Packed High Words". +// +// Mnemonic : PSHUFHW +// Supported forms : (2 forms) +// +// * PSHUFHW imm8, xmm, xmm [SSE2] +// * PSHUFHW imm8, m128, xmm [SSE2] +// +func (self *Program) PSHUFHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PSHUFHW", 3, Operands { v0, v1, v2 }) + // PSHUFHW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSHUFHW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFHW") + } + return p +} + +// PSHUFLW performs "Shuffle Packed Low Words". +// +// Mnemonic : PSHUFLW +// Supported forms : (2 forms) +// +// * PSHUFLW imm8, xmm, xmm [SSE2] +// * PSHUFLW imm8, m128, xmm [SSE2] +// +func (self *Program) PSHUFLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PSHUFLW", 3, Operands { v0, v1, v2 }) + // PSHUFLW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSHUFLW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFLW") + } + return p +} + +// PSHUFW performs "Shuffle Packed Words". +// +// Mnemonic : PSHUFW +// Supported forms : (2 forms) +// +// * PSHUFW imm8, mm, mm [MMX+] +// * PSHUFW imm8, m64, mm [MMX+] +// +func (self *Program) PSHUFW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("PSHUFW", 3, Operands { v0, v1, v2 }) + // PSHUFW imm8, mm, mm + if isImm8(v0) && isMM(v1) && isMM(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSHUFW imm8, m64, mm + if isImm8(v0) && isM64(v1) && isMM(v2) { + self.require(ISA_MMX_PLUS) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSHUFW") + } + return p +} + +// PSIGNB performs "Packed Sign of Byte Integers". +// +// Mnemonic : PSIGNB +// Supported forms : (4 forms) +// +// * PSIGNB mm, mm [SSSE3] +// * PSIGNB m64, mm [SSSE3] +// * PSIGNB xmm, xmm [SSSE3] +// * PSIGNB m128, xmm [SSSE3] +// +func (self *Program) PSIGNB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSIGNB", 2, Operands { v0, v1 }) + // PSIGNB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x08) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGNB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x08) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSIGNB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x08) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGNB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x08) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSIGNB") + } + return p +} + +// PSIGND performs "Packed Sign of Doubleword Integers". +// +// Mnemonic : PSIGND +// Supported forms : (4 forms) +// +// * PSIGND mm, mm [SSSE3] +// * PSIGND m64, mm [SSSE3] +// * PSIGND xmm, xmm [SSSE3] +// * PSIGND m128, xmm [SSSE3] +// +func (self *Program) PSIGND(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSIGND", 2, Operands { v0, v1 }) + // PSIGND mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGND m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSIGND xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGND m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x0a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSIGND") + } + return p +} + +// PSIGNW performs "Packed Sign of Word Integers". +// +// Mnemonic : PSIGNW +// Supported forms : (4 forms) +// +// * PSIGNW mm, mm [SSSE3] +// * PSIGNW m64, mm [SSSE3] +// * PSIGNW xmm, xmm [SSSE3] +// * PSIGNW m128, xmm [SSSE3] +// +func (self *Program) PSIGNW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSIGNW", 2, Operands { v0, v1 }) + // PSIGNW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x09) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGNW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x09) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSIGNW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x09) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSIGNW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSSE3) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x09) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSIGNW") + } + return p +} + +// PSLLD performs "Shift Packed Doubleword Data Left Logical". +// +// Mnemonic : PSLLD +// Supported forms : (6 forms) +// +// * PSLLD imm8, mm [MMX] +// * PSLLD mm, mm [MMX] +// * PSLLD m64, mm [MMX] +// * PSLLD imm8, xmm [SSE2] +// * PSLLD xmm, xmm [SSE2] +// * PSLLD m128, xmm [SSE2] +// +func (self *Program) PSLLD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSLLD", 2, Operands { v0, v1 }) + // PSLLD imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSLLD imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSLLD") + } + return p +} + +// PSLLDQ performs "Shift Packed Double Quadword Left Logical". +// +// Mnemonic : PSLLDQ +// Supported forms : (1 form) +// +// * PSLLDQ imm8, xmm [SSE2] +// +func (self *Program) PSLLDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSLLDQ", 2, Operands { v0, v1 }) + // PSLLDQ imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSLLDQ") + } + return p +} + +// PSLLQ performs "Shift Packed Quadword Data Left Logical". +// +// Mnemonic : PSLLQ +// Supported forms : (6 forms) +// +// * PSLLQ imm8, mm [MMX] +// * PSLLQ mm, mm [MMX] +// * PSLLQ m64, mm [MMX] +// * PSLLQ imm8, xmm [SSE2] +// * PSLLQ xmm, xmm [SSE2] +// * PSLLQ m128, xmm [SSE2] +// +func (self *Program) PSLLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSLLQ", 2, Operands { v0, v1 }) + // PSLLQ imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSLLQ imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSLLQ") + } + return p +} + +// PSLLW performs "Shift Packed Word Data Left Logical". +// +// Mnemonic : PSLLW +// Supported forms : (6 forms) +// +// * PSLLW imm8, mm [MMX] +// * PSLLW mm, mm [MMX] +// * PSLLW m64, mm [MMX] +// * PSLLW imm8, xmm [SSE2] +// * PSLLW xmm, xmm [SSE2] +// * PSLLW m128, xmm [SSE2] +// +func (self *Program) PSLLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSLLW", 2, Operands { v0, v1 }) + // PSLLW imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSLLW imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSLLW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSLLW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSLLW") + } + return p +} + +// PSRAD performs "Shift Packed Doubleword Data Right Arithmetic". +// +// Mnemonic : PSRAD +// Supported forms : (6 forms) +// +// * PSRAD imm8, mm [MMX] +// * PSRAD mm, mm [MMX] +// * PSRAD m64, mm [MMX] +// * PSRAD imm8, xmm [SSE2] +// * PSRAD xmm, xmm [SSE2] +// * PSRAD m128, xmm [SSE2] +// +func (self *Program) PSRAD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRAD", 2, Operands { v0, v1 }) + // PSRAD imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRAD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRAD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRAD imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRAD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRAD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRAD") + } + return p +} + +// PSRAW performs "Shift Packed Word Data Right Arithmetic". +// +// Mnemonic : PSRAW +// Supported forms : (6 forms) +// +// * PSRAW imm8, mm [MMX] +// * PSRAW mm, mm [MMX] +// * PSRAW m64, mm [MMX] +// * PSRAW imm8, xmm [SSE2] +// * PSRAW xmm, xmm [SSE2] +// * PSRAW m128, xmm [SSE2] +// +func (self *Program) PSRAW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRAW", 2, Operands { v0, v1 }) + // PSRAW imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRAW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRAW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRAW imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRAW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRAW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRAW") + } + return p +} + +// PSRLD performs "Shift Packed Doubleword Data Right Logical". +// +// Mnemonic : PSRLD +// Supported forms : (6 forms) +// +// * PSRLD imm8, mm [MMX] +// * PSRLD mm, mm [MMX] +// * PSRLD m64, mm [MMX] +// * PSRLD imm8, xmm [SSE2] +// * PSRLD xmm, xmm [SSE2] +// * PSRLD m128, xmm [SSE2] +// +func (self *Program) PSRLD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRLD", 2, Operands { v0, v1 }) + // PSRLD imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRLD imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRLD") + } + return p +} + +// PSRLDQ performs "Shift Packed Double Quadword Right Logical". +// +// Mnemonic : PSRLDQ +// Supported forms : (1 form) +// +// * PSRLDQ imm8, xmm [SSE2] +// +func (self *Program) PSRLDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRLDQ", 2, Operands { v0, v1 }) + // PSRLDQ imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for PSRLDQ") + } + return p +} + +// PSRLQ performs "Shift Packed Quadword Data Right Logical". +// +// Mnemonic : PSRLQ +// Supported forms : (6 forms) +// +// * PSRLQ imm8, mm [MMX] +// * PSRLQ mm, mm [MMX] +// * PSRLQ m64, mm [MMX] +// * PSRLQ imm8, xmm [SSE2] +// * PSRLQ xmm, xmm [SSE2] +// * PSRLQ m128, xmm [SSE2] +// +func (self *Program) PSRLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRLQ", 2, Operands { v0, v1 }) + // PSRLQ imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRLQ imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRLQ") + } + return p +} + +// PSRLW performs "Shift Packed Word Data Right Logical". +// +// Mnemonic : PSRLW +// Supported forms : (6 forms) +// +// * PSRLW imm8, mm [MMX] +// * PSRLW mm, mm [MMX] +// * PSRLW m64, mm [MMX] +// * PSRLW imm8, xmm [SSE2] +// * PSRLW xmm, xmm [SSE2] +// * PSRLW m128, xmm [SSE2] +// +func (self *Program) PSRLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSRLW", 2, Operands { v0, v1 }) + // PSRLW imm8, mm + if isImm8(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSRLW imm8, xmm + if isImm8(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x0f) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // PSRLW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSRLW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSRLW") + } + return p +} + +// PSUBB performs "Subtract Packed Byte Integers". +// +// Mnemonic : PSUBB +// Supported forms : (4 forms) +// +// * PSUBB mm, mm [MMX] +// * PSUBB m64, mm [MMX] +// * PSUBB xmm, xmm [SSE2] +// * PSUBB m128, xmm [SSE2] +// +func (self *Program) PSUBB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBB", 2, Operands { v0, v1 }) + // PSUBB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBB") + } + return p +} + +// PSUBD performs "Subtract Packed Doubleword Integers". +// +// Mnemonic : PSUBD +// Supported forms : (4 forms) +// +// * PSUBD mm, mm [MMX] +// * PSUBD m64, mm [MMX] +// * PSUBD xmm, xmm [SSE2] +// * PSUBD m128, xmm [SSE2] +// +func (self *Program) PSUBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBD", 2, Operands { v0, v1 }) + // PSUBD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfa) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfa) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBD") + } + return p +} + +// PSUBQ performs "Subtract Packed Quadword Integers". +// +// Mnemonic : PSUBQ +// Supported forms : (4 forms) +// +// * PSUBQ mm, mm [SSE2] +// * PSUBQ m64, mm [SSE2] +// * PSUBQ xmm, xmm [SSE2] +// * PSUBQ m128, xmm [SSE2] +// +func (self *Program) PSUBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBQ", 2, Operands { v0, v1 }) + // PSUBQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xfb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBQ") + } + return p +} + +// PSUBSB performs "Subtract Packed Signed Byte Integers with Signed Saturation". +// +// Mnemonic : PSUBSB +// Supported forms : (4 forms) +// +// * PSUBSB mm, mm [MMX] +// * PSUBSB m64, mm [MMX] +// * PSUBSB xmm, xmm [SSE2] +// * PSUBSB m128, xmm [SSE2] +// +func (self *Program) PSUBSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBSB", 2, Operands { v0, v1 }) + // PSUBSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBSB") + } + return p +} + +// PSUBSW performs "Subtract Packed Signed Word Integers with Signed Saturation". +// +// Mnemonic : PSUBSW +// Supported forms : (4 forms) +// +// * PSUBSW mm, mm [MMX] +// * PSUBSW m64, mm [MMX] +// * PSUBSW xmm, xmm [SSE2] +// * PSUBSW m128, xmm [SSE2] +// +func (self *Program) PSUBSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBSW", 2, Operands { v0, v1 }) + // PSUBSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xe9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBSW") + } + return p +} + +// PSUBUSB performs "Subtract Packed Unsigned Byte Integers with Unsigned Saturation". +// +// Mnemonic : PSUBUSB +// Supported forms : (4 forms) +// +// * PSUBUSB mm, mm [MMX] +// * PSUBUSB m64, mm [MMX] +// * PSUBUSB xmm, xmm [SSE2] +// * PSUBUSB m128, xmm [SSE2] +// +func (self *Program) PSUBUSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBUSB", 2, Operands { v0, v1 }) + // PSUBUSB mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBUSB m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBUSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBUSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBUSB") + } + return p +} + +// PSUBUSW performs "Subtract Packed Unsigned Word Integers with Unsigned Saturation". +// +// Mnemonic : PSUBUSW +// Supported forms : (4 forms) +// +// * PSUBUSW mm, mm [MMX] +// * PSUBUSW m64, mm [MMX] +// * PSUBUSW xmm, xmm [SSE2] +// * PSUBUSW m128, xmm [SSE2] +// +func (self *Program) PSUBUSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBUSW", 2, Operands { v0, v1 }) + // PSUBUSW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBUSW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBUSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBUSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xd9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBUSW") + } + return p +} + +// PSUBW performs "Subtract Packed Word Integers". +// +// Mnemonic : PSUBW +// Supported forms : (4 forms) +// +// * PSUBW mm, mm [MMX] +// * PSUBW m64, mm [MMX] +// * PSUBW xmm, xmm [SSE2] +// * PSUBW m128, xmm [SSE2] +// +func (self *Program) PSUBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSUBW", 2, Operands { v0, v1 }) + // PSUBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PSUBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PSUBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xf9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PSUBW") + } + return p +} + +// PSWAPD performs "Packed Swap Doubleword". +// +// Mnemonic : PSWAPD +// Supported forms : (2 forms) +// +// * PSWAPD mm, mm [3dnow!+] +// * PSWAPD m64, mm [3dnow!+] +// +func (self *Program) PSWAPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PSWAPD", 2, Operands { v0, v1 }) + // PSWAPD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + m.emit(0xbb) + }) + } + // PSWAPD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_3DNOW_PLUS) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + m.emit(0xbb) + }) + } + if p.len == 0 { + panic("invalid operands for PSWAPD") + } + return p +} + +// PTEST performs "Packed Logical Compare". +// +// Mnemonic : PTEST +// Supported forms : (2 forms) +// +// * PTEST xmm, xmm [SSE4.1] +// * PTEST m128, xmm [SSE4.1] +// +func (self *Program) PTEST(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PTEST", 2, Operands { v0, v1 }) + // PTEST xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PTEST m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PTEST") + } + return p +} + +// PUNPCKHBW performs "Unpack and Interleave High-Order Bytes into Words". +// +// Mnemonic : PUNPCKHBW +// Supported forms : (4 forms) +// +// * PUNPCKHBW mm, mm [MMX] +// * PUNPCKHBW m64, mm [MMX] +// * PUNPCKHBW xmm, xmm [SSE2] +// * PUNPCKHBW m128, xmm [SSE2] +// +func (self *Program) PUNPCKHBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKHBW", 2, Operands { v0, v1 }) + // PUNPCKHBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x68) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHBW m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x68) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKHBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x68) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x68) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKHBW") + } + return p +} + +// PUNPCKHDQ performs "Unpack and Interleave High-Order Doublewords into Quadwords". +// +// Mnemonic : PUNPCKHDQ +// Supported forms : (4 forms) +// +// * PUNPCKHDQ mm, mm [MMX] +// * PUNPCKHDQ m64, mm [MMX] +// * PUNPCKHDQ xmm, xmm [SSE2] +// * PUNPCKHDQ m128, xmm [SSE2] +// +func (self *Program) PUNPCKHDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKHDQ", 2, Operands { v0, v1 }) + // PUNPCKHDQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHDQ m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKHDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKHDQ") + } + return p +} + +// PUNPCKHQDQ performs "Unpack and Interleave High-Order Quadwords into Double Quadwords". +// +// Mnemonic : PUNPCKHQDQ +// Supported forms : (2 forms) +// +// * PUNPCKHQDQ xmm, xmm [SSE2] +// * PUNPCKHQDQ m128, xmm [SSE2] +// +func (self *Program) PUNPCKHQDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKHQDQ", 2, Operands { v0, v1 }) + // PUNPCKHQDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHQDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKHQDQ") + } + return p +} + +// PUNPCKHWD performs "Unpack and Interleave High-Order Words into Doublewords". +// +// Mnemonic : PUNPCKHWD +// Supported forms : (4 forms) +// +// * PUNPCKHWD mm, mm [MMX] +// * PUNPCKHWD m64, mm [MMX] +// * PUNPCKHWD xmm, xmm [SSE2] +// * PUNPCKHWD m128, xmm [SSE2] +// +func (self *Program) PUNPCKHWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKHWD", 2, Operands { v0, v1 }) + // PUNPCKHWD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x69) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHWD m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x69) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKHWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x69) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKHWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x69) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKHWD") + } + return p +} + +// PUNPCKLBW performs "Unpack and Interleave Low-Order Bytes into Words". +// +// Mnemonic : PUNPCKLBW +// Supported forms : (4 forms) +// +// * PUNPCKLBW mm, mm [MMX] +// * PUNPCKLBW m32, mm [MMX] +// * PUNPCKLBW xmm, xmm [SSE2] +// * PUNPCKLBW m128, xmm [SSE2] +// +func (self *Program) PUNPCKLBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKLBW", 2, Operands { v0, v1 }) + // PUNPCKLBW mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x60) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLBW m32, mm + if isM32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x60) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKLBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x60) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x60) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKLBW") + } + return p +} + +// PUNPCKLDQ performs "Unpack and Interleave Low-Order Doublewords into Quadwords". +// +// Mnemonic : PUNPCKLDQ +// Supported forms : (4 forms) +// +// * PUNPCKLDQ mm, mm [MMX] +// * PUNPCKLDQ m32, mm [MMX] +// * PUNPCKLDQ xmm, xmm [SSE2] +// * PUNPCKLDQ m128, xmm [SSE2] +// +func (self *Program) PUNPCKLDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKLDQ", 2, Operands { v0, v1 }) + // PUNPCKLDQ mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x62) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLDQ m32, mm + if isM32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x62) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKLDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x62) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x62) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKLDQ") + } + return p +} + +// PUNPCKLQDQ performs "Unpack and Interleave Low-Order Quadwords into Double Quadwords". +// +// Mnemonic : PUNPCKLQDQ +// Supported forms : (2 forms) +// +// * PUNPCKLQDQ xmm, xmm [SSE2] +// * PUNPCKLQDQ m128, xmm [SSE2] +// +func (self *Program) PUNPCKLQDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKLQDQ", 2, Operands { v0, v1 }) + // PUNPCKLQDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLQDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x6c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKLQDQ") + } + return p +} + +// PUNPCKLWD performs "Unpack and Interleave Low-Order Words into Doublewords". +// +// Mnemonic : PUNPCKLWD +// Supported forms : (4 forms) +// +// * PUNPCKLWD mm, mm [MMX] +// * PUNPCKLWD m32, mm [MMX] +// * PUNPCKLWD xmm, xmm [SSE2] +// * PUNPCKLWD m128, xmm [SSE2] +// +func (self *Program) PUNPCKLWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PUNPCKLWD", 2, Operands { v0, v1 }) + // PUNPCKLWD mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x61) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLWD m32, mm + if isM32(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x61) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PUNPCKLWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x61) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PUNPCKLWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x61) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUNPCKLWD") + } + return p +} + +// PUSHQ performs "Push Value Onto the Stack". +// +// Mnemonic : PUSH +// Supported forms : (4 forms) +// +// * PUSHQ imm8 +// * PUSHQ imm32 +// * PUSHQ r64 +// * PUSHQ m64 +// +func (self *Program) PUSHQ(v0 interface{}) *Instruction { + p := self.alloc("PUSHQ", 1, Operands { v0 }) + // PUSHQ imm8 + if isImm8Ext(v0, 8) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x6a) + m.imm1(toImmAny(v[0])) + }) + } + // PUSHQ imm32 + if isImm32Ext(v0, 8) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x68) + m.imm4(toImmAny(v[0])) + }) + } + // PUSHQ r64 + if isReg64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x50 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xf0 | lcode(v[0])) + }) + } + // PUSHQ m64 + if isM64(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUSHQ") + } + return p +} + +// PUSHW performs "Push Value Onto the Stack". +// +// Mnemonic : PUSH +// Supported forms : (2 forms) +// +// * PUSHW r16 +// * PUSHW m16 +// +func (self *Program) PUSHW(v0 interface{}) *Instruction { + p := self.alloc("PUSHW", 1, Operands { v0 }) + // PUSHW r16 + if isReg16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x50 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0xff) + m.emit(0xf0 | lcode(v[0])) + }) + } + // PUSHW m16 + if isM16(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[0]), false) + m.emit(0xff) + m.mrsd(6, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PUSHW") + } + return p +} + +// PXOR performs "Packed Bitwise Logical Exclusive OR". +// +// Mnemonic : PXOR +// Supported forms : (4 forms) +// +// * PXOR mm, mm [MMX] +// * PXOR m64, mm [MMX] +// * PXOR xmm, xmm [SSE2] +// * PXOR m128, xmm [SSE2] +// +func (self *Program) PXOR(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("PXOR", 2, Operands { v0, v1 }) + // PXOR mm, mm + if isMM(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xef) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PXOR m64, mm + if isM64(v0) && isMM(v1) { + self.require(ISA_MMX) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xef) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // PXOR xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xef) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // PXOR m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xef) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for PXOR") + } + return p +} + +// RCLB performs "Rotate Left through Carry Flag". +// +// Mnemonic : RCL +// Supported forms : (6 forms) +// +// * RCLB 1, r8 +// * RCLB imm8, r8 +// * RCLB cl, r8 +// * RCLB 1, m8 +// * RCLB imm8, m8 +// * RCLB cl, m8 +// +func (self *Program) RCLB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCLB", 2, Operands { v0, v1 }) + // RCLB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCLB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(2, addr(v[1]), 1) + }) + } + // RCLB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCLB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(2, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCLB") + } + return p +} + +// RCLL performs "Rotate Left through Carry Flag". +// +// Mnemonic : RCL +// Supported forms : (6 forms) +// +// * RCLL 1, r32 +// * RCLL imm8, r32 +// * RCLL cl, r32 +// * RCLL 1, m32 +// * RCLL imm8, m32 +// * RCLL cl, m32 +// +func (self *Program) RCLL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCLL", 2, Operands { v0, v1 }) + // RCLL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCLL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(2, addr(v[1]), 1) + }) + } + // RCLL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCLL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(2, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCLL") + } + return p +} + +// RCLQ performs "Rotate Left through Carry Flag". +// +// Mnemonic : RCL +// Supported forms : (6 forms) +// +// * RCLQ 1, r64 +// * RCLQ imm8, r64 +// * RCLQ cl, r64 +// * RCLQ 1, m64 +// * RCLQ imm8, m64 +// * RCLQ cl, m64 +// +func (self *Program) RCLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCLQ", 2, Operands { v0, v1 }) + // RCLQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCLQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(2, addr(v[1]), 1) + }) + } + // RCLQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCLQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(2, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCLQ") + } + return p +} + +// RCLW performs "Rotate Left through Carry Flag". +// +// Mnemonic : RCL +// Supported forms : (6 forms) +// +// * RCLW 1, r16 +// * RCLW imm8, r16 +// * RCLW cl, r16 +// * RCLW 1, m16 +// * RCLW imm8, m16 +// * RCLW cl, m16 +// +func (self *Program) RCLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCLW", 2, Operands { v0, v1 }) + // RCLW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCLW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xd0 | lcode(v[1])) + }) + } + // RCLW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(2, addr(v[1]), 1) + }) + } + // RCLW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(2, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCLW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(2, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCLW") + } + return p +} + +// RCPPS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : RCPPS +// Supported forms : (2 forms) +// +// * RCPPS xmm, xmm [SSE] +// * RCPPS m128, xmm [SSE] +// +func (self *Program) RCPPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCPPS", 2, Operands { v0, v1 }) + // RCPPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x53) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // RCPPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x53) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCPPS") + } + return p +} + +// RCPSS performs "Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : RCPSS +// Supported forms : (2 forms) +// +// * RCPSS xmm, xmm [SSE] +// * RCPSS m32, xmm [SSE] +// +func (self *Program) RCPSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCPSS", 2, Operands { v0, v1 }) + // RCPSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x53) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // RCPSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x53) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCPSS") + } + return p +} + +// RCRB performs "Rotate Right through Carry Flag". +// +// Mnemonic : RCR +// Supported forms : (6 forms) +// +// * RCRB 1, r8 +// * RCRB imm8, r8 +// * RCRB cl, r8 +// * RCRB 1, m8 +// * RCRB imm8, m8 +// * RCRB cl, m8 +// +func (self *Program) RCRB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCRB", 2, Operands { v0, v1 }) + // RCRB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCRB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(3, addr(v[1]), 1) + }) + } + // RCRB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCRB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(3, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCRB") + } + return p +} + +// RCRL performs "Rotate Right through Carry Flag". +// +// Mnemonic : RCR +// Supported forms : (6 forms) +// +// * RCRL 1, r32 +// * RCRL imm8, r32 +// * RCRL cl, r32 +// * RCRL 1, m32 +// * RCRL imm8, m32 +// * RCRL cl, m32 +// +func (self *Program) RCRL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCRL", 2, Operands { v0, v1 }) + // RCRL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCRL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(3, addr(v[1]), 1) + }) + } + // RCRL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCRL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(3, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCRL") + } + return p +} + +// RCRQ performs "Rotate Right through Carry Flag". +// +// Mnemonic : RCR +// Supported forms : (6 forms) +// +// * RCRQ 1, r64 +// * RCRQ imm8, r64 +// * RCRQ cl, r64 +// * RCRQ 1, m64 +// * RCRQ imm8, m64 +// * RCRQ cl, m64 +// +func (self *Program) RCRQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCRQ", 2, Operands { v0, v1 }) + // RCRQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCRQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(3, addr(v[1]), 1) + }) + } + // RCRQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCRQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(3, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCRQ") + } + return p +} + +// RCRW performs "Rotate Right through Carry Flag". +// +// Mnemonic : RCR +// Supported forms : (6 forms) +// +// * RCRW 1, r16 +// * RCRW imm8, r16 +// * RCRW cl, r16 +// * RCRW 1, m16 +// * RCRW imm8, m16 +// * RCRW cl, m16 +// +func (self *Program) RCRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RCRW", 2, Operands { v0, v1 }) + // RCRW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RCRW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xd8 | lcode(v[1])) + }) + } + // RCRW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(3, addr(v[1]), 1) + }) + } + // RCRW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RCRW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(3, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RCRW") + } + return p +} + +// RDRAND performs "Read Random Number". +// +// Mnemonic : RDRAND +// Supported forms : (3 forms) +// +// * RDRAND r16 [RDRAND] +// * RDRAND r32 [RDRAND] +// * RDRAND r64 [RDRAND] +// +func (self *Program) RDRAND(v0 interface{}) *Instruction { + p := self.alloc("RDRAND", 1, Operands { v0 }) + // RDRAND r16 + if isReg16(v0) { + self.require(ISA_RDRAND) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // RDRAND r32 + if isReg32(v0) { + self.require(ISA_RDRAND) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf0 | lcode(v[0])) + }) + } + // RDRAND r64 + if isReg64(v0) { + self.require(ISA_RDRAND) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf0 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RDRAND") + } + return p +} + +// RDSEED performs "Read Random SEED". +// +// Mnemonic : RDSEED +// Supported forms : (3 forms) +// +// * RDSEED r16 [RDSEED] +// * RDSEED r32 [RDSEED] +// * RDSEED r64 [RDSEED] +// +func (self *Program) RDSEED(v0 interface{}) *Instruction { + p := self.alloc("RDSEED", 1, Operands { v0 }) + // RDSEED r16 + if isReg16(v0) { + self.require(ISA_RDSEED) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // RDSEED r32 + if isReg32(v0) { + self.require(ISA_RDSEED) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf8 | lcode(v[0])) + }) + } + // RDSEED r64 + if isReg64(v0) { + self.require(ISA_RDSEED) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xc7) + m.emit(0xf8 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RDSEED") + } + return p +} + +// RDTSC performs "Read Time-Stamp Counter". +// +// Mnemonic : RDTSC +// Supported forms : (1 form) +// +// * RDTSC [RDTSC] +// +func (self *Program) RDTSC() *Instruction { + p := self.alloc("RDTSC", 0, Operands { }) + // RDTSC + self.require(ISA_RDTSC) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x31) + }) + return p +} + +// RDTSCP performs "Read Time-Stamp Counter and Processor ID". +// +// Mnemonic : RDTSCP +// Supported forms : (1 form) +// +// * RDTSCP [RDTSCP] +// +func (self *Program) RDTSCP() *Instruction { + p := self.alloc("RDTSCP", 0, Operands { }) + // RDTSCP + self.require(ISA_RDTSCP) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xf9) + }) + return p +} + +// RET performs "Return from Procedure". +// +// Mnemonic : RET +// Supported forms : (2 forms) +// +// * RET +// * RET imm16 +// +func (self *Program) RET(vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("RET", 0, Operands { }) + case 1 : p = self.alloc("RET", 1, Operands { vv[0] }) + default : panic("instruction RET takes 0 or 1 operands") + } + // RET + if len(vv) == 0 { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc3) + }) + } + // RET imm16 + if len(vv) == 1 && isImm16(vv[0]) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc2) + m.imm2(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RET") + } + return p +} + +// ROLB performs "Rotate Left". +// +// Mnemonic : ROL +// Supported forms : (6 forms) +// +// * ROLB 1, r8 +// * ROLB imm8, r8 +// * ROLB cl, r8 +// * ROLB 1, m8 +// * ROLB imm8, m8 +// * ROLB cl, m8 +// +func (self *Program) ROLB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ROLB", 2, Operands { v0, v1 }) + // ROLB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROLB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(0, addr(v[1]), 1) + }) + } + // ROLB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ROLB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(0, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ROLB") + } + return p +} + +// ROLL performs "Rotate Left". +// +// Mnemonic : ROL +// Supported forms : (6 forms) +// +// * ROLL 1, r32 +// * ROLL imm8, r32 +// * ROLL cl, r32 +// * ROLL 1, m32 +// * ROLL imm8, m32 +// * ROLL cl, m32 +// +func (self *Program) ROLL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ROLL", 2, Operands { v0, v1 }) + // ROLL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROLL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(0, addr(v[1]), 1) + }) + } + // ROLL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ROLL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(0, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ROLL") + } + return p +} + +// ROLQ performs "Rotate Left". +// +// Mnemonic : ROL +// Supported forms : (6 forms) +// +// * ROLQ 1, r64 +// * ROLQ imm8, r64 +// * ROLQ cl, r64 +// * ROLQ 1, m64 +// * ROLQ imm8, m64 +// * ROLQ cl, m64 +// +func (self *Program) ROLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ROLQ", 2, Operands { v0, v1 }) + // ROLQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROLQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(0, addr(v[1]), 1) + }) + } + // ROLQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ROLQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(0, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ROLQ") + } + return p +} + +// ROLW performs "Rotate Left". +// +// Mnemonic : ROL +// Supported forms : (6 forms) +// +// * ROLW 1, r16 +// * ROLW imm8, r16 +// * ROLW cl, r16 +// * ROLW 1, m16 +// * ROLW imm8, m16 +// * ROLW cl, m16 +// +func (self *Program) ROLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("ROLW", 2, Operands { v0, v1 }) + // ROLW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROLW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1])) + }) + } + // ROLW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(0, addr(v[1]), 1) + }) + } + // ROLW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // ROLW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(0, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for ROLW") + } + return p +} + +// RORB performs "Rotate Right". +// +// Mnemonic : ROR +// Supported forms : (6 forms) +// +// * RORB 1, r8 +// * RORB imm8, r8 +// * RORB cl, r8 +// * RORB 1, m8 +// * RORB imm8, m8 +// * RORB cl, m8 +// +func (self *Program) RORB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RORB", 2, Operands { v0, v1 }) + // RORB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(1, addr(v[1]), 1) + }) + } + // RORB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RORB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(1, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RORB") + } + return p +} + +// RORL performs "Rotate Right". +// +// Mnemonic : ROR +// Supported forms : (6 forms) +// +// * RORL 1, r32 +// * RORL imm8, r32 +// * RORL cl, r32 +// * RORL 1, m32 +// * RORL imm8, m32 +// * RORL cl, m32 +// +func (self *Program) RORL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RORL", 2, Operands { v0, v1 }) + // RORL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(1, addr(v[1]), 1) + }) + } + // RORL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RORL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(1, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RORL") + } + return p +} + +// RORQ performs "Rotate Right". +// +// Mnemonic : ROR +// Supported forms : (6 forms) +// +// * RORQ 1, r64 +// * RORQ imm8, r64 +// * RORQ cl, r64 +// * RORQ 1, m64 +// * RORQ imm8, m64 +// * RORQ cl, m64 +// +func (self *Program) RORQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RORQ", 2, Operands { v0, v1 }) + // RORQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(1, addr(v[1]), 1) + }) + } + // RORQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RORQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(1, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RORQ") + } + return p +} + +// RORW performs "Rotate Right". +// +// Mnemonic : ROR +// Supported forms : (6 forms) +// +// * RORW 1, r16 +// * RORW imm8, r16 +// * RORW cl, r16 +// * RORW 1, m16 +// * RORW imm8, m16 +// * RORW cl, m16 +// +func (self *Program) RORW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RORW", 2, Operands { v0, v1 }) + // RORW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xc8 | lcode(v[1])) + }) + } + // RORW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(1, addr(v[1]), 1) + }) + } + // RORW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(1, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // RORW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(1, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RORW") + } + return p +} + +// RORXL performs "Rotate Right Logical Without Affecting Flags". +// +// Mnemonic : RORX +// Supported forms : (2 forms) +// +// * RORXL imm8, r32, r32 [BMI2] +// * RORXL imm8, m32, r32 [BMI2] +// +func (self *Program) RORXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("RORXL", 3, Operands { v0, v1, v2 }) + // RORXL imm8, r32, r32 + if isImm8(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7b) + m.emit(0xf0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORXL imm8, m32, r32 + if isImm8(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x03, hcode(v[2]), addr(v[1]), 0) + m.emit(0xf0) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RORXL") + } + return p +} + +// RORXQ performs "Rotate Right Logical Without Affecting Flags". +// +// Mnemonic : RORX +// Supported forms : (2 forms) +// +// * RORXQ imm8, r64, r64 [BMI2] +// * RORXQ imm8, m64, r64 [BMI2] +// +func (self *Program) RORXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("RORXQ", 3, Operands { v0, v1, v2 }) + // RORXQ imm8, r64, r64 + if isImm8(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfb) + m.emit(0xf0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // RORXQ imm8, m64, r64 + if isImm8(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x83, hcode(v[2]), addr(v[1]), 0) + m.emit(0xf0) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for RORXQ") + } + return p +} + +// ROUNDPD performs "Round Packed Double Precision Floating-Point Values". +// +// Mnemonic : ROUNDPD +// Supported forms : (2 forms) +// +// * ROUNDPD imm8, xmm, xmm [SSE4.1] +// * ROUNDPD imm8, m128, xmm [SSE4.1] +// +func (self *Program) ROUNDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ROUNDPD", 3, Operands { v0, v1, v2 }) + // ROUNDPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROUNDPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for ROUNDPD") + } + return p +} + +// ROUNDPS performs "Round Packed Single Precision Floating-Point Values". +// +// Mnemonic : ROUNDPS +// Supported forms : (2 forms) +// +// * ROUNDPS imm8, xmm, xmm [SSE4.1] +// * ROUNDPS imm8, m128, xmm [SSE4.1] +// +func (self *Program) ROUNDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ROUNDPS", 3, Operands { v0, v1, v2 }) + // ROUNDPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROUNDPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for ROUNDPS") + } + return p +} + +// ROUNDSD performs "Round Scalar Double Precision Floating-Point Values". +// +// Mnemonic : ROUNDSD +// Supported forms : (2 forms) +// +// * ROUNDSD imm8, xmm, xmm [SSE4.1] +// * ROUNDSD imm8, m64, xmm [SSE4.1] +// +func (self *Program) ROUNDSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ROUNDSD", 3, Operands { v0, v1, v2 }) + // ROUNDSD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROUNDSD imm8, m64, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for ROUNDSD") + } + return p +} + +// ROUNDSS performs "Round Scalar Single Precision Floating-Point Values". +// +// Mnemonic : ROUNDSS +// Supported forms : (2 forms) +// +// * ROUNDSS imm8, xmm, xmm [SSE4.1] +// * ROUNDSS imm8, m32, xmm [SSE4.1] +// +func (self *Program) ROUNDSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("ROUNDSS", 3, Operands { v0, v1, v2 }) + // ROUNDSS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // ROUNDSS imm8, m32, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) { + self.require(ISA_SSE4_1) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0x0a) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for ROUNDSS") + } + return p +} + +// RSQRTPS performs "Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : RSQRTPS +// Supported forms : (2 forms) +// +// * RSQRTPS xmm, xmm [SSE] +// * RSQRTPS m128, xmm [SSE] +// +func (self *Program) RSQRTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RSQRTPS", 2, Operands { v0, v1 }) + // RSQRTPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x52) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // RSQRTPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x52) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RSQRTPS") + } + return p +} + +// RSQRTSS performs "Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : RSQRTSS +// Supported forms : (2 forms) +// +// * RSQRTSS xmm, xmm [SSE] +// * RSQRTSS m32, xmm [SSE] +// +func (self *Program) RSQRTSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("RSQRTSS", 2, Operands { v0, v1 }) + // RSQRTSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x52) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // RSQRTSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x52) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for RSQRTSS") + } + return p +} + +// SALB performs "Arithmetic Shift Left". +// +// Mnemonic : SAL +// Supported forms : (6 forms) +// +// * SALB 1, r8 +// * SALB imm8, r8 +// * SALB cl, r8 +// * SALB 1, m8 +// * SALB imm8, m8 +// * SALB cl, m8 +// +func (self *Program) SALB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SALB", 2, Operands { v0, v1 }) + // SALB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SALB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SALB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SALB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SALB") + } + return p +} + +// SALL performs "Arithmetic Shift Left". +// +// Mnemonic : SAL +// Supported forms : (6 forms) +// +// * SALL 1, r32 +// * SALL imm8, r32 +// * SALL cl, r32 +// * SALL 1, m32 +// * SALL imm8, m32 +// * SALL cl, m32 +// +func (self *Program) SALL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SALL", 2, Operands { v0, v1 }) + // SALL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SALL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SALL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SALL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SALL") + } + return p +} + +// SALQ performs "Arithmetic Shift Left". +// +// Mnemonic : SAL +// Supported forms : (6 forms) +// +// * SALQ 1, r64 +// * SALQ imm8, r64 +// * SALQ cl, r64 +// * SALQ 1, m64 +// * SALQ imm8, m64 +// * SALQ cl, m64 +// +func (self *Program) SALQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SALQ", 2, Operands { v0, v1 }) + // SALQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SALQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SALQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SALQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SALQ") + } + return p +} + +// SALW performs "Arithmetic Shift Left". +// +// Mnemonic : SAL +// Supported forms : (6 forms) +// +// * SALW 1, r16 +// * SALW imm8, r16 +// * SALW cl, r16 +// * SALW 1, m16 +// * SALW imm8, m16 +// * SALW cl, m16 +// +func (self *Program) SALW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SALW", 2, Operands { v0, v1 }) + // SALW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SALW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SALW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SALW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SALW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SALW") + } + return p +} + +// SARB performs "Arithmetic Shift Right". +// +// Mnemonic : SAR +// Supported forms : (6 forms) +// +// * SARB 1, r8 +// * SARB imm8, r8 +// * SARB cl, r8 +// * SARB 1, m8 +// * SARB imm8, m8 +// * SARB cl, m8 +// +func (self *Program) SARB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SARB", 2, Operands { v0, v1 }) + // SARB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SARB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(7, addr(v[1]), 1) + }) + } + // SARB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SARB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(7, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARB") + } + return p +} + +// SARL performs "Arithmetic Shift Right". +// +// Mnemonic : SAR +// Supported forms : (6 forms) +// +// * SARL 1, r32 +// * SARL imm8, r32 +// * SARL cl, r32 +// * SARL 1, m32 +// * SARL imm8, m32 +// * SARL cl, m32 +// +func (self *Program) SARL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SARL", 2, Operands { v0, v1 }) + // SARL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SARL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(7, addr(v[1]), 1) + }) + } + // SARL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SARL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(7, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARL") + } + return p +} + +// SARQ performs "Arithmetic Shift Right". +// +// Mnemonic : SAR +// Supported forms : (6 forms) +// +// * SARQ 1, r64 +// * SARQ imm8, r64 +// * SARQ cl, r64 +// * SARQ 1, m64 +// * SARQ imm8, m64 +// * SARQ cl, m64 +// +func (self *Program) SARQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SARQ", 2, Operands { v0, v1 }) + // SARQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SARQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(7, addr(v[1]), 1) + }) + } + // SARQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SARQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(7, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARQ") + } + return p +} + +// SARW performs "Arithmetic Shift Right". +// +// Mnemonic : SAR +// Supported forms : (6 forms) +// +// * SARW 1, r16 +// * SARW imm8, r16 +// * SARW cl, r16 +// * SARW 1, m16 +// * SARW imm8, m16 +// * SARW cl, m16 +// +func (self *Program) SARW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SARW", 2, Operands { v0, v1 }) + // SARW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SARW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xf8 | lcode(v[1])) + }) + } + // SARW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(7, addr(v[1]), 1) + }) + } + // SARW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(7, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SARW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(7, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARW") + } + return p +} + +// SARXL performs "Arithmetic Shift Right Without Affecting Flags". +// +// Mnemonic : SARX +// Supported forms : (2 forms) +// +// * SARXL r32, r32, r32 [BMI2] +// * SARXL r32, m32, r32 [BMI2] +// +func (self *Program) SARXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SARXL", 3, Operands { v0, v1, v2 }) + // SARXL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7a ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SARXL r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x02, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARXL") + } + return p +} + +// SARXQ performs "Arithmetic Shift Right Without Affecting Flags". +// +// Mnemonic : SARX +// Supported forms : (2 forms) +// +// * SARXQ r64, r64, r64 [BMI2] +// * SARXQ r64, m64, r64 [BMI2] +// +func (self *Program) SARXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SARXQ", 3, Operands { v0, v1, v2 }) + // SARXQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfa ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SARXQ r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x82, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SARXQ") + } + return p +} + +// SBBB performs "Subtract with Borrow". +// +// Mnemonic : SBB +// Supported forms : (6 forms) +// +// * SBBB imm8, al +// * SBBB imm8, r8 +// * SBBB r8, r8 +// * SBBB m8, r8 +// * SBBB imm8, m8 +// * SBBB r8, m8 +// +func (self *Program) SBBB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SBBB", 2, Operands { v0, v1 }) + // SBBB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x1c) + m.imm1(toImmAny(v[0])) + }) + } + // SBBB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SBBB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x18) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x1a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SBBB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SBBB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SBBB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x18) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SBBB") + } + return p +} + +// SBBL performs "Subtract with Borrow". +// +// Mnemonic : SBB +// Supported forms : (8 forms) +// +// * SBBL imm32, eax +// * SBBL imm8, r32 +// * SBBL imm32, r32 +// * SBBL r32, r32 +// * SBBL m32, r32 +// * SBBL imm8, m32 +// * SBBL imm32, m32 +// * SBBL r32, m32 +// +func (self *Program) SBBL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SBBL", 2, Operands { v0, v1 }) + // SBBL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x1d) + m.imm4(toImmAny(v[0])) + }) + } + // SBBL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SBBL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xd8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // SBBL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x19) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SBBL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SBBL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SBBL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(3, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // SBBL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x19) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SBBL") + } + return p +} + +// SBBQ performs "Subtract with Borrow". +// +// Mnemonic : SBB +// Supported forms : (8 forms) +// +// * SBBQ imm32, rax +// * SBBQ imm8, r64 +// * SBBQ imm32, r64 +// * SBBQ r64, r64 +// * SBBQ m64, r64 +// * SBBQ imm8, m64 +// * SBBQ imm32, m64 +// * SBBQ r64, m64 +// +func (self *Program) SBBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SBBQ", 2, Operands { v0, v1 }) + // SBBQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x1d) + m.imm4(toImmAny(v[0])) + }) + } + // SBBQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SBBQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xd8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // SBBQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x19) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SBBQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SBBQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SBBQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(3, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // SBBQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x19) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SBBQ") + } + return p +} + +// SBBW performs "Subtract with Borrow". +// +// Mnemonic : SBB +// Supported forms : (8 forms) +// +// * SBBW imm16, ax +// * SBBW imm8, r16 +// * SBBW imm16, r16 +// * SBBW r16, r16 +// * SBBW m16, r16 +// * SBBW imm8, m16 +// * SBBW imm16, m16 +// * SBBW r16, m16 +// +func (self *Program) SBBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SBBW", 2, Operands { v0, v1 }) + // SBBW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x1d) + m.imm2(toImmAny(v[0])) + }) + } + // SBBW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SBBW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xd8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // SBBW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x19) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SBBW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SBBW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(3, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SBBW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(3, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // SBBW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x19) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SBBW") + } + return p +} + +// SETA performs "Set byte if above (CF == 0 and ZF == 0)". +// +// Mnemonic : SETA +// Supported forms : (2 forms) +// +// * SETA r8 +// * SETA m8 +// +func (self *Program) SETA(v0 interface{}) *Instruction { + p := self.alloc("SETA", 1, Operands { v0 }) + // SETA r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x97) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETA m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x97) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETA") + } + return p +} + +// SETAE performs "Set byte if above or equal (CF == 0)". +// +// Mnemonic : SETAE +// Supported forms : (2 forms) +// +// * SETAE r8 +// * SETAE m8 +// +func (self *Program) SETAE(v0 interface{}) *Instruction { + p := self.alloc("SETAE", 1, Operands { v0 }) + // SETAE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x93) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETAE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x93) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETAE") + } + return p +} + +// SETB performs "Set byte if below (CF == 1)". +// +// Mnemonic : SETB +// Supported forms : (2 forms) +// +// * SETB r8 +// * SETB m8 +// +func (self *Program) SETB(v0 interface{}) *Instruction { + p := self.alloc("SETB", 1, Operands { v0 }) + // SETB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x92) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x92) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETB") + } + return p +} + +// SETBE performs "Set byte if below or equal (CF == 1 or ZF == 1)". +// +// Mnemonic : SETBE +// Supported forms : (2 forms) +// +// * SETBE r8 +// * SETBE m8 +// +func (self *Program) SETBE(v0 interface{}) *Instruction { + p := self.alloc("SETBE", 1, Operands { v0 }) + // SETBE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x96) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETBE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x96) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETBE") + } + return p +} + +// SETC performs "Set byte if carry (CF == 1)". +// +// Mnemonic : SETC +// Supported forms : (2 forms) +// +// * SETC r8 +// * SETC m8 +// +func (self *Program) SETC(v0 interface{}) *Instruction { + p := self.alloc("SETC", 1, Operands { v0 }) + // SETC r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x92) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETC m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x92) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETC") + } + return p +} + +// SETE performs "Set byte if equal (ZF == 1)". +// +// Mnemonic : SETE +// Supported forms : (2 forms) +// +// * SETE r8 +// * SETE m8 +// +func (self *Program) SETE(v0 interface{}) *Instruction { + p := self.alloc("SETE", 1, Operands { v0 }) + // SETE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x94) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x94) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETE") + } + return p +} + +// SETG performs "Set byte if greater (ZF == 0 and SF == OF)". +// +// Mnemonic : SETG +// Supported forms : (2 forms) +// +// * SETG r8 +// * SETG m8 +// +func (self *Program) SETG(v0 interface{}) *Instruction { + p := self.alloc("SETG", 1, Operands { v0 }) + // SETG r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETG m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9f) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETG") + } + return p +} + +// SETGE performs "Set byte if greater or equal (SF == OF)". +// +// Mnemonic : SETGE +// Supported forms : (2 forms) +// +// * SETGE r8 +// * SETGE m8 +// +func (self *Program) SETGE(v0 interface{}) *Instruction { + p := self.alloc("SETGE", 1, Operands { v0 }) + // SETGE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETGE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9d) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETGE") + } + return p +} + +// SETL performs "Set byte if less (SF != OF)". +// +// Mnemonic : SETL +// Supported forms : (2 forms) +// +// * SETL r8 +// * SETL m8 +// +func (self *Program) SETL(v0 interface{}) *Instruction { + p := self.alloc("SETL", 1, Operands { v0 }) + // SETL r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETL m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9c) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETL") + } + return p +} + +// SETLE performs "Set byte if less or equal (ZF == 1 or SF != OF)". +// +// Mnemonic : SETLE +// Supported forms : (2 forms) +// +// * SETLE r8 +// * SETLE m8 +// +func (self *Program) SETLE(v0 interface{}) *Instruction { + p := self.alloc("SETLE", 1, Operands { v0 }) + // SETLE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETLE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9e) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETLE") + } + return p +} + +// SETNA performs "Set byte if not above (CF == 1 or ZF == 1)". +// +// Mnemonic : SETNA +// Supported forms : (2 forms) +// +// * SETNA r8 +// * SETNA m8 +// +func (self *Program) SETNA(v0 interface{}) *Instruction { + p := self.alloc("SETNA", 1, Operands { v0 }) + // SETNA r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x96) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNA m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x96) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNA") + } + return p +} + +// SETNAE performs "Set byte if not above or equal (CF == 1)". +// +// Mnemonic : SETNAE +// Supported forms : (2 forms) +// +// * SETNAE r8 +// * SETNAE m8 +// +func (self *Program) SETNAE(v0 interface{}) *Instruction { + p := self.alloc("SETNAE", 1, Operands { v0 }) + // SETNAE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x92) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNAE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x92) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNAE") + } + return p +} + +// SETNB performs "Set byte if not below (CF == 0)". +// +// Mnemonic : SETNB +// Supported forms : (2 forms) +// +// * SETNB r8 +// * SETNB m8 +// +func (self *Program) SETNB(v0 interface{}) *Instruction { + p := self.alloc("SETNB", 1, Operands { v0 }) + // SETNB r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x93) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNB m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x93) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNB") + } + return p +} + +// SETNBE performs "Set byte if not below or equal (CF == 0 and ZF == 0)". +// +// Mnemonic : SETNBE +// Supported forms : (2 forms) +// +// * SETNBE r8 +// * SETNBE m8 +// +func (self *Program) SETNBE(v0 interface{}) *Instruction { + p := self.alloc("SETNBE", 1, Operands { v0 }) + // SETNBE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x97) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNBE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x97) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNBE") + } + return p +} + +// SETNC performs "Set byte if not carry (CF == 0)". +// +// Mnemonic : SETNC +// Supported forms : (2 forms) +// +// * SETNC r8 +// * SETNC m8 +// +func (self *Program) SETNC(v0 interface{}) *Instruction { + p := self.alloc("SETNC", 1, Operands { v0 }) + // SETNC r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x93) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNC m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x93) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNC") + } + return p +} + +// SETNE performs "Set byte if not equal (ZF == 0)". +// +// Mnemonic : SETNE +// Supported forms : (2 forms) +// +// * SETNE r8 +// * SETNE m8 +// +func (self *Program) SETNE(v0 interface{}) *Instruction { + p := self.alloc("SETNE", 1, Operands { v0 }) + // SETNE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x95) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x95) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNE") + } + return p +} + +// SETNG performs "Set byte if not greater (ZF == 1 or SF != OF)". +// +// Mnemonic : SETNG +// Supported forms : (2 forms) +// +// * SETNG r8 +// * SETNG m8 +// +func (self *Program) SETNG(v0 interface{}) *Instruction { + p := self.alloc("SETNG", 1, Operands { v0 }) + // SETNG r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNG m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9e) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNG") + } + return p +} + +// SETNGE performs "Set byte if not greater or equal (SF != OF)". +// +// Mnemonic : SETNGE +// Supported forms : (2 forms) +// +// * SETNGE r8 +// * SETNGE m8 +// +func (self *Program) SETNGE(v0 interface{}) *Instruction { + p := self.alloc("SETNGE", 1, Operands { v0 }) + // SETNGE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNGE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9c) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNGE") + } + return p +} + +// SETNL performs "Set byte if not less (SF == OF)". +// +// Mnemonic : SETNL +// Supported forms : (2 forms) +// +// * SETNL r8 +// * SETNL m8 +// +func (self *Program) SETNL(v0 interface{}) *Instruction { + p := self.alloc("SETNL", 1, Operands { v0 }) + // SETNL r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNL m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9d) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNL") + } + return p +} + +// SETNLE performs "Set byte if not less or equal (ZF == 0 and SF == OF)". +// +// Mnemonic : SETNLE +// Supported forms : (2 forms) +// +// * SETNLE r8 +// * SETNLE m8 +// +func (self *Program) SETNLE(v0 interface{}) *Instruction { + p := self.alloc("SETNLE", 1, Operands { v0 }) + // SETNLE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNLE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9f) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNLE") + } + return p +} + +// SETNO performs "Set byte if not overflow (OF == 0)". +// +// Mnemonic : SETNO +// Supported forms : (2 forms) +// +// * SETNO r8 +// * SETNO m8 +// +func (self *Program) SETNO(v0 interface{}) *Instruction { + p := self.alloc("SETNO", 1, Operands { v0 }) + // SETNO r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x91) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNO m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x91) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNO") + } + return p +} + +// SETNP performs "Set byte if not parity (PF == 0)". +// +// Mnemonic : SETNP +// Supported forms : (2 forms) +// +// * SETNP r8 +// * SETNP m8 +// +func (self *Program) SETNP(v0 interface{}) *Instruction { + p := self.alloc("SETNP", 1, Operands { v0 }) + // SETNP r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNP m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9b) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNP") + } + return p +} + +// SETNS performs "Set byte if not sign (SF == 0)". +// +// Mnemonic : SETNS +// Supported forms : (2 forms) +// +// * SETNS r8 +// * SETNS m8 +// +func (self *Program) SETNS(v0 interface{}) *Instruction { + p := self.alloc("SETNS", 1, Operands { v0 }) + // SETNS r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x99) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNS m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x99) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNS") + } + return p +} + +// SETNZ performs "Set byte if not zero (ZF == 0)". +// +// Mnemonic : SETNZ +// Supported forms : (2 forms) +// +// * SETNZ r8 +// * SETNZ m8 +// +func (self *Program) SETNZ(v0 interface{}) *Instruction { + p := self.alloc("SETNZ", 1, Operands { v0 }) + // SETNZ r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x95) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETNZ m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x95) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETNZ") + } + return p +} + +// SETO performs "Set byte if overflow (OF == 1)". +// +// Mnemonic : SETO +// Supported forms : (2 forms) +// +// * SETO r8 +// * SETO m8 +// +func (self *Program) SETO(v0 interface{}) *Instruction { + p := self.alloc("SETO", 1, Operands { v0 }) + // SETO r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x90) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETO m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x90) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETO") + } + return p +} + +// SETP performs "Set byte if parity (PF == 1)". +// +// Mnemonic : SETP +// Supported forms : (2 forms) +// +// * SETP r8 +// * SETP m8 +// +func (self *Program) SETP(v0 interface{}) *Instruction { + p := self.alloc("SETP", 1, Operands { v0 }) + // SETP r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETP m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9a) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETP") + } + return p +} + +// SETPE performs "Set byte if parity even (PF == 1)". +// +// Mnemonic : SETPE +// Supported forms : (2 forms) +// +// * SETPE r8 +// * SETPE m8 +// +func (self *Program) SETPE(v0 interface{}) *Instruction { + p := self.alloc("SETPE", 1, Operands { v0 }) + // SETPE r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETPE m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9a) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETPE") + } + return p +} + +// SETPO performs "Set byte if parity odd (PF == 0)". +// +// Mnemonic : SETPO +// Supported forms : (2 forms) +// +// * SETPO r8 +// * SETPO m8 +// +func (self *Program) SETPO(v0 interface{}) *Instruction { + p := self.alloc("SETPO", 1, Operands { v0 }) + // SETPO r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETPO m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x9b) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETPO") + } + return p +} + +// SETS performs "Set byte if sign (SF == 1)". +// +// Mnemonic : SETS +// Supported forms : (2 forms) +// +// * SETS r8 +// * SETS m8 +// +func (self *Program) SETS(v0 interface{}) *Instruction { + p := self.alloc("SETS", 1, Operands { v0 }) + // SETS r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x98) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETS m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x98) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETS") + } + return p +} + +// SETZ performs "Set byte if zero (ZF == 1)". +// +// Mnemonic : SETZ +// Supported forms : (2 forms) +// +// * SETZ r8 +// * SETZ m8 +// +func (self *Program) SETZ(v0 interface{}) *Instruction { + p := self.alloc("SETZ", 1, Operands { v0 }) + // SETZ r8 + if isReg8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0x94) + m.emit(0xc0 | lcode(v[0])) + }) + } + // SETZ m8 + if isM8(v0) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0x94) + m.mrsd(0, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SETZ") + } + return p +} + +// SFENCE performs "Store Fence". +// +// Mnemonic : SFENCE +// Supported forms : (1 form) +// +// * SFENCE [MMX+] +// +func (self *Program) SFENCE() *Instruction { + p := self.alloc("SFENCE", 0, Operands { }) + // SFENCE + self.require(ISA_MMX_PLUS) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0xae) + m.emit(0xf8) + }) + return p +} + +// SHA1MSG1 performs "Perform an Intermediate Calculation for the Next Four SHA1 Message Doublewords". +// +// Mnemonic : SHA1MSG1 +// Supported forms : (2 forms) +// +// * SHA1MSG1 xmm, xmm [SHA] +// * SHA1MSG1 m128, xmm [SHA] +// +func (self *Program) SHA1MSG1(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA1MSG1", 2, Operands { v0, v1 }) + // SHA1MSG1 xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xc9) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA1MSG1 m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xc9) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA1MSG1") + } + return p +} + +// SHA1MSG2 performs "Perform a Final Calculation for the Next Four SHA1 Message Doublewords". +// +// Mnemonic : SHA1MSG2 +// Supported forms : (2 forms) +// +// * SHA1MSG2 xmm, xmm [SHA] +// * SHA1MSG2 m128, xmm [SHA] +// +func (self *Program) SHA1MSG2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA1MSG2", 2, Operands { v0, v1 }) + // SHA1MSG2 xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xca) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA1MSG2 m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xca) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA1MSG2") + } + return p +} + +// SHA1NEXTE performs "Calculate SHA1 State Variable E after Four Rounds". +// +// Mnemonic : SHA1NEXTE +// Supported forms : (2 forms) +// +// * SHA1NEXTE xmm, xmm [SHA] +// * SHA1NEXTE m128, xmm [SHA] +// +func (self *Program) SHA1NEXTE(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA1NEXTE", 2, Operands { v0, v1 }) + // SHA1NEXTE xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA1NEXTE m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xc8) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA1NEXTE") + } + return p +} + +// SHA1RNDS4 performs "Perform Four Rounds of SHA1 Operation". +// +// Mnemonic : SHA1RNDS4 +// Supported forms : (2 forms) +// +// * SHA1RNDS4 imm8, xmm, xmm [SHA] +// * SHA1RNDS4 imm8, m128, xmm [SHA] +// +func (self *Program) SHA1RNDS4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHA1RNDS4", 3, Operands { v0, v1, v2 }) + // SHA1RNDS4 imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHA1RNDS4 imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x3a) + m.emit(0xcc) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for SHA1RNDS4") + } + return p +} + +// SHA256MSG1 performs "Perform an Intermediate Calculation for the Next Four SHA256 Message Doublewords". +// +// Mnemonic : SHA256MSG1 +// Supported forms : (2 forms) +// +// * SHA256MSG1 xmm, xmm [SHA] +// * SHA256MSG1 m128, xmm [SHA] +// +func (self *Program) SHA256MSG1(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA256MSG1", 2, Operands { v0, v1 }) + // SHA256MSG1 xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA256MSG1 m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA256MSG1") + } + return p +} + +// SHA256MSG2 performs "Perform a Final Calculation for the Next Four SHA256 Message Doublewords". +// +// Mnemonic : SHA256MSG2 +// Supported forms : (2 forms) +// +// * SHA256MSG2 xmm, xmm [SHA] +// * SHA256MSG2 m128, xmm [SHA] +// +func (self *Program) SHA256MSG2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHA256MSG2", 2, Operands { v0, v1 }) + // SHA256MSG2 xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SHA256MSG2 m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcd) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA256MSG2") + } + return p +} + +// SHA256RNDS2 performs "Perform Two Rounds of SHA256 Operation". +// +// Mnemonic : SHA256RNDS2 +// Supported forms : (2 forms) +// +// * SHA256RNDS2 xmm0, xmm, xmm [SHA] +// * SHA256RNDS2 xmm0, m128, xmm [SHA] +// +func (self *Program) SHA256RNDS2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHA256RNDS2", 3, Operands { v0, v1, v2 }) + // SHA256RNDS2 xmm0, xmm, xmm + if v0 == XMM0 && isXMM(v1) && isXMM(v2) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHA256RNDS2 xmm0, m128, xmm + if v0 == XMM0 && isM128(v1) && isXMM(v2) { + self.require(ISA_SHA) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0x38) + m.emit(0xcb) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHA256RNDS2") + } + return p +} + +// SHLB performs "Logical Shift Left". +// +// Mnemonic : SHL +// Supported forms : (6 forms) +// +// * SHLB 1, r8 +// * SHLB imm8, r8 +// * SHLB cl, r8 +// * SHLB 1, m8 +// * SHLB imm8, m8 +// * SHLB cl, m8 +// +func (self *Program) SHLB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHLB", 2, Operands { v0, v1 }) + // SHLB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SHLB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLB") + } + return p +} + +// SHLDL performs "Integer Double Precision Shift Left". +// +// Mnemonic : SHLD +// Supported forms : (4 forms) +// +// * SHLDL imm8, r32, r32 +// * SHLDL cl, r32, r32 +// * SHLDL imm8, r32, m32 +// * SHLDL cl, r32, m32 +// +func (self *Program) SHLDL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLDL", 3, Operands { v0, v1, v2 }) + // SHLDL imm8, r32, r32 + if isImm8(v0) && isReg32(v1) && isReg32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xa4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDL cl, r32, r32 + if v0 == CL && isReg32(v1) && isReg32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xa5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHLDL imm8, r32, m32 + if isImm8(v0) && isReg32(v1) && isM32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xa4) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDL cl, r32, m32 + if v0 == CL && isReg32(v1) && isM32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xa5) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLDL") + } + return p +} + +// SHLDQ performs "Integer Double Precision Shift Left". +// +// Mnemonic : SHLD +// Supported forms : (4 forms) +// +// * SHLDQ imm8, r64, r64 +// * SHLDQ cl, r64, r64 +// * SHLDQ imm8, r64, m64 +// * SHLDQ cl, r64, m64 +// +func (self *Program) SHLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLDQ", 3, Operands { v0, v1, v2 }) + // SHLDQ imm8, r64, r64 + if isImm8(v0) && isReg64(v1) && isReg64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0xa4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDQ cl, r64, r64 + if v0 == CL && isReg64(v1) && isReg64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0xa5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHLDQ imm8, r64, m64 + if isImm8(v0) && isReg64(v1) && isM64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0xa4) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDQ cl, r64, m64 + if v0 == CL && isReg64(v1) && isM64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0xa5) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLDQ") + } + return p +} + +// SHLDW performs "Integer Double Precision Shift Left". +// +// Mnemonic : SHLD +// Supported forms : (4 forms) +// +// * SHLDW imm8, r16, r16 +// * SHLDW cl, r16, r16 +// * SHLDW imm8, r16, m16 +// * SHLDW cl, r16, m16 +// +func (self *Program) SHLDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLDW", 3, Operands { v0, v1, v2 }) + // SHLDW imm8, r16, r16 + if isImm8(v0) && isReg16(v1) && isReg16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xa4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDW cl, r16, r16 + if v0 == CL && isReg16(v1) && isReg16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xa5) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHLDW imm8, r16, m16 + if isImm8(v0) && isReg16(v1) && isM16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xa4) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLDW cl, r16, m16 + if v0 == CL && isReg16(v1) && isM16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xa5) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLDW") + } + return p +} + +// SHLL performs "Logical Shift Left". +// +// Mnemonic : SHL +// Supported forms : (6 forms) +// +// * SHLL 1, r32 +// * SHLL imm8, r32 +// * SHLL cl, r32 +// * SHLL 1, m32 +// * SHLL imm8, m32 +// * SHLL cl, m32 +// +func (self *Program) SHLL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHLL", 2, Operands { v0, v1 }) + // SHLL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SHLL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLL") + } + return p +} + +// SHLQ performs "Logical Shift Left". +// +// Mnemonic : SHL +// Supported forms : (6 forms) +// +// * SHLQ 1, r64 +// * SHLQ imm8, r64 +// * SHLQ cl, r64 +// * SHLQ 1, m64 +// * SHLQ imm8, m64 +// * SHLQ cl, m64 +// +func (self *Program) SHLQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHLQ", 2, Operands { v0, v1 }) + // SHLQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SHLQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLQ") + } + return p +} + +// SHLW performs "Logical Shift Left". +// +// Mnemonic : SHL +// Supported forms : (6 forms) +// +// * SHLW 1, r16 +// * SHLW imm8, r16 +// * SHLW cl, r16 +// * SHLW 1, m16 +// * SHLW imm8, m16 +// * SHLW cl, m16 +// +func (self *Program) SHLW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHLW", 2, Operands { v0, v1 }) + // SHLW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHLW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe0 | lcode(v[1])) + }) + } + // SHLW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(4, addr(v[1]), 1) + }) + } + // SHLW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(4, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHLW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(4, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLW") + } + return p +} + +// SHLXL performs "Logical Shift Left Without Affecting Flags". +// +// Mnemonic : SHLX +// Supported forms : (2 forms) +// +// * SHLXL r32, r32, r32 [BMI2] +// * SHLXL r32, m32, r32 [BMI2] +// +func (self *Program) SHLXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLXL", 3, Operands { v0, v1, v2 }) + // SHLXL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHLXL r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLXL") + } + return p +} + +// SHLXQ performs "Logical Shift Left Without Affecting Flags". +// +// Mnemonic : SHLX +// Supported forms : (2 forms) +// +// * SHLXQ r64, r64, r64 [BMI2] +// * SHLXQ r64, m64, r64 [BMI2] +// +func (self *Program) SHLXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHLXQ", 3, Operands { v0, v1, v2 }) + // SHLXQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9 ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHLXQ r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHLXQ") + } + return p +} + +// SHRB performs "Logical Shift Right". +// +// Mnemonic : SHR +// Supported forms : (6 forms) +// +// * SHRB 1, r8 +// * SHRB imm8, r8 +// * SHRB cl, r8 +// * SHRB 1, m8 +// * SHRB imm8, m8 +// * SHRB cl, m8 +// +func (self *Program) SHRB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHRB", 2, Operands { v0, v1 }) + // SHRB 1, r8 + if isConst1(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd0) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xc0) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRB cl, r8 + if v0 == CL && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xd2) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRB 1, m8 + if isConst1(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd0) + m.mrsd(5, addr(v[1]), 1) + }) + } + // SHRB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc0) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRB cl, m8 + if v0 == CL && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd2) + m.mrsd(5, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRB") + } + return p +} + +// SHRDL performs "Integer Double Precision Shift Right". +// +// Mnemonic : SHRD +// Supported forms : (4 forms) +// +// * SHRDL imm8, r32, r32 +// * SHRDL cl, r32, r32 +// * SHRDL imm8, r32, m32 +// * SHRDL cl, r32, m32 +// +func (self *Program) SHRDL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRDL", 3, Operands { v0, v1, v2 }) + // SHRDL imm8, r32, r32 + if isImm8(v0) && isReg32(v1) && isReg32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xac) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDL cl, r32, r32 + if v0 == CL && isReg32(v1) && isReg32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xad) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHRDL imm8, r32, m32 + if isImm8(v0) && isReg32(v1) && isM32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xac) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDL cl, r32, m32 + if v0 == CL && isReg32(v1) && isM32(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xad) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRDL") + } + return p +} + +// SHRDQ performs "Integer Double Precision Shift Right". +// +// Mnemonic : SHRD +// Supported forms : (4 forms) +// +// * SHRDQ imm8, r64, r64 +// * SHRDQ cl, r64, r64 +// * SHRDQ imm8, r64, m64 +// * SHRDQ cl, r64, m64 +// +func (self *Program) SHRDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRDQ", 3, Operands { v0, v1, v2 }) + // SHRDQ imm8, r64, r64 + if isImm8(v0) && isReg64(v1) && isReg64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0xac) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDQ cl, r64, r64 + if v0 == CL && isReg64(v1) && isReg64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[2])) + m.emit(0x0f) + m.emit(0xad) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHRDQ imm8, r64, m64 + if isImm8(v0) && isReg64(v1) && isM64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0xac) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDQ cl, r64, m64 + if v0 == CL && isReg64(v1) && isM64(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[2])) + m.emit(0x0f) + m.emit(0xad) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRDQ") + } + return p +} + +// SHRDW performs "Integer Double Precision Shift Right". +// +// Mnemonic : SHRD +// Supported forms : (4 forms) +// +// * SHRDW imm8, r16, r16 +// * SHRDW cl, r16, r16 +// * SHRDW imm8, r16, m16 +// * SHRDW cl, r16, m16 +// +func (self *Program) SHRDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRDW", 3, Operands { v0, v1, v2 }) + // SHRDW imm8, r16, r16 + if isImm8(v0) && isReg16(v1) && isReg16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xac) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDW cl, r16, r16 + if v0 == CL && isReg16(v1) && isReg16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[2], false) + m.emit(0x0f) + m.emit(0xad) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + }) + } + // SHRDW imm8, r16, m16 + if isImm8(v0) && isReg16(v1) && isM16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xac) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRDW cl, r16, m16 + if v0 == CL && isReg16(v1) && isM16(v2) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[2]), false) + m.emit(0x0f) + m.emit(0xad) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRDW") + } + return p +} + +// SHRL performs "Logical Shift Right". +// +// Mnemonic : SHR +// Supported forms : (6 forms) +// +// * SHRL 1, r32 +// * SHRL imm8, r32 +// * SHRL cl, r32 +// * SHRL 1, m32 +// * SHRL imm8, m32 +// * SHRL cl, m32 +// +func (self *Program) SHRL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHRL", 2, Operands { v0, v1 }) + // SHRL 1, r32 + if isConst1(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRL imm8, r32 + if isImm8(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRL cl, r32 + if v0 == CL && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRL 1, m32 + if isConst1(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(5, addr(v[1]), 1) + }) + } + // SHRL imm8, m32 + if isImm8(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRL cl, m32 + if v0 == CL && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(5, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRL") + } + return p +} + +// SHRQ performs "Logical Shift Right". +// +// Mnemonic : SHR +// Supported forms : (6 forms) +// +// * SHRQ 1, r64 +// * SHRQ imm8, r64 +// * SHRQ cl, r64 +// * SHRQ 1, m64 +// * SHRQ imm8, m64 +// * SHRQ cl, m64 +// +func (self *Program) SHRQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHRQ", 2, Operands { v0, v1 }) + // SHRQ 1, r64 + if isConst1(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd1) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRQ imm8, r64 + if isImm8(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xc1) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRQ cl, r64 + if v0 == CL && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xd3) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRQ 1, m64 + if isConst1(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd1) + m.mrsd(5, addr(v[1]), 1) + }) + } + // SHRQ imm8, m64 + if isImm8(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xc1) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRQ cl, m64 + if v0 == CL && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xd3) + m.mrsd(5, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRQ") + } + return p +} + +// SHRW performs "Logical Shift Right". +// +// Mnemonic : SHR +// Supported forms : (6 forms) +// +// * SHRW 1, r16 +// * SHRW imm8, r16 +// * SHRW cl, r16 +// * SHRW 1, m16 +// * SHRW imm8, m16 +// * SHRW cl, m16 +// +func (self *Program) SHRW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SHRW", 2, Operands { v0, v1 }) + // SHRW 1, r16 + if isConst1(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd1) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRW imm8, r16 + if isImm8(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xc1) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHRW cl, r16 + if v0 == CL && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xd3) + m.emit(0xe8 | lcode(v[1])) + }) + } + // SHRW 1, m16 + if isConst1(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd1) + m.mrsd(5, addr(v[1]), 1) + }) + } + // SHRW imm8, m16 + if isImm8(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xc1) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SHRW cl, m16 + if v0 == CL && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xd3) + m.mrsd(5, addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRW") + } + return p +} + +// SHRXL performs "Logical Shift Right Without Affecting Flags". +// +// Mnemonic : SHRX +// Supported forms : (2 forms) +// +// * SHRXL r32, r32, r32 [BMI2] +// * SHRXL r32, m32, r32 [BMI2] +// +func (self *Program) SHRXL(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRXL", 3, Operands { v0, v1, v2 }) + // SHRXL r32, r32, r32 + if isReg32(v0) && isReg32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7b ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHRXL r32, m32, r32 + if isReg32(v0) && isM32(v1) && isReg32(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x03, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRXL") + } + return p +} + +// SHRXQ performs "Logical Shift Right Without Affecting Flags". +// +// Mnemonic : SHRX +// Supported forms : (2 forms) +// +// * SHRXQ r64, r64, r64 [BMI2] +// * SHRXQ r64, m64, r64 [BMI2] +// +func (self *Program) SHRXQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHRXQ", 3, Operands { v0, v1, v2 }) + // SHRXQ r64, r64, r64 + if isReg64(v0) && isReg64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfb ^ (hlcode(v[0]) << 3)) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // SHRXQ r64, m64, r64 + if isReg64(v0) && isM64(v1) && isReg64(v2) { + self.require(ISA_BMI2) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x83, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0xf7) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SHRXQ") + } + return p +} + +// SHUFPD performs "Shuffle Packed Double-Precision Floating-Point Values". +// +// Mnemonic : SHUFPD +// Supported forms : (2 forms) +// +// * SHUFPD imm8, xmm, xmm [SSE2] +// * SHUFPD imm8, m128, xmm [SSE2] +// +func (self *Program) SHUFPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHUFPD", 3, Operands { v0, v1, v2 }) + // SHUFPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHUFPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc6) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for SHUFPD") + } + return p +} + +// SHUFPS performs "Shuffle Packed Single-Precision Floating-Point Values". +// +// Mnemonic : SHUFPS +// Supported forms : (2 forms) +// +// * SHUFPS imm8, xmm, xmm [SSE] +// * SHUFPS imm8, m128, xmm [SSE] +// +func (self *Program) SHUFPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("SHUFPS", 3, Operands { v0, v1, v2 }) + // SHUFPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), v[1], false) + m.emit(0x0f) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SHUFPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[2]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc6) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for SHUFPS") + } + return p +} + +// SQRTPD performs "Compute Square Roots of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : SQRTPD +// Supported forms : (2 forms) +// +// * SQRTPD xmm, xmm [SSE2] +// * SQRTPD m128, xmm [SSE2] +// +func (self *Program) SQRTPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SQRTPD", 2, Operands { v0, v1 }) + // SQRTPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SQRTPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SQRTPD") + } + return p +} + +// SQRTPS performs "Compute Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : SQRTPS +// Supported forms : (2 forms) +// +// * SQRTPS xmm, xmm [SSE] +// * SQRTPS m128, xmm [SSE] +// +func (self *Program) SQRTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SQRTPS", 2, Operands { v0, v1 }) + // SQRTPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SQRTPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SQRTPS") + } + return p +} + +// SQRTSD performs "Compute Square Root of Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : SQRTSD +// Supported forms : (2 forms) +// +// * SQRTSD xmm, xmm [SSE2] +// * SQRTSD m64, xmm [SSE2] +// +func (self *Program) SQRTSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SQRTSD", 2, Operands { v0, v1 }) + // SQRTSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SQRTSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SQRTSD") + } + return p +} + +// SQRTSS performs "Compute Square Root of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : SQRTSS +// Supported forms : (2 forms) +// +// * SQRTSS xmm, xmm [SSE] +// * SQRTSS m32, xmm [SSE] +// +func (self *Program) SQRTSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SQRTSS", 2, Operands { v0, v1 }) + // SQRTSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SQRTSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SQRTSS") + } + return p +} + +// STC performs "Set Carry Flag". +// +// Mnemonic : STC +// Supported forms : (1 form) +// +// * STC +// +func (self *Program) STC() *Instruction { + p := self.alloc("STC", 0, Operands { }) + // STC + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf9) + }) + return p +} + +// STD performs "Set Direction Flag". +// +// Mnemonic : STD +// Supported forms : (1 form) +// +// * STD +// +func (self *Program) STD() *Instruction { + p := self.alloc("STD", 0, Operands { }) + // STD + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xfd) + }) + return p +} + +// STMXCSR performs "Store MXCSR Register State". +// +// Mnemonic : STMXCSR +// Supported forms : (1 form) +// +// * STMXCSR m32 [SSE] +// +func (self *Program) STMXCSR(v0 interface{}) *Instruction { + p := self.alloc("STMXCSR", 1, Operands { v0 }) + // STMXCSR m32 + if isM32(v0) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[0]), false) + m.emit(0x0f) + m.emit(0xae) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for STMXCSR") + } + return p +} + +// SUBB performs "Subtract". +// +// Mnemonic : SUB +// Supported forms : (6 forms) +// +// * SUBB imm8, al +// * SUBB imm8, r8 +// * SUBB r8, r8 +// * SUBB m8, r8 +// * SUBB imm8, m8 +// * SUBB r8, m8 +// +func (self *Program) SUBB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBB", 2, Operands { v0, v1 }) + // SUBB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x2c) + m.imm1(toImmAny(v[0])) + }) + } + // SUBB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SUBB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x28) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SUBB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SUBB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x28) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBB") + } + return p +} + +// SUBL performs "Subtract". +// +// Mnemonic : SUB +// Supported forms : (8 forms) +// +// * SUBL imm32, eax +// * SUBL imm8, r32 +// * SUBL imm32, r32 +// * SUBL r32, r32 +// * SUBL m32, r32 +// * SUBL imm8, m32 +// * SUBL imm32, m32 +// * SUBL r32, m32 +// +func (self *Program) SUBL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBL", 2, Operands { v0, v1 }) + // SUBL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x2d) + m.imm4(toImmAny(v[0])) + }) + } + // SUBL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SUBL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xe8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // SUBL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x2b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SUBL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SUBL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(5, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // SUBL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBL") + } + return p +} + +// SUBPD performs "Subtract Packed Double-Precision Floating-Point Values". +// +// Mnemonic : SUBPD +// Supported forms : (2 forms) +// +// * SUBPD xmm, xmm [SSE2] +// * SUBPD m128, xmm [SSE2] +// +func (self *Program) SUBPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBPD", 2, Operands { v0, v1 }) + // SUBPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBPD") + } + return p +} + +// SUBPS performs "Subtract Packed Single-Precision Floating-Point Values". +// +// Mnemonic : SUBPS +// Supported forms : (2 forms) +// +// * SUBPS xmm, xmm [SSE] +// * SUBPS m128, xmm [SSE] +// +func (self *Program) SUBPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBPS", 2, Operands { v0, v1 }) + // SUBPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBPS") + } + return p +} + +// SUBQ performs "Subtract". +// +// Mnemonic : SUB +// Supported forms : (8 forms) +// +// * SUBQ imm32, rax +// * SUBQ imm8, r64 +// * SUBQ imm32, r64 +// * SUBQ r64, r64 +// * SUBQ m64, r64 +// * SUBQ imm8, m64 +// * SUBQ imm32, m64 +// * SUBQ r64, m64 +// +func (self *Program) SUBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBQ", 2, Operands { v0, v1 }) + // SUBQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x2d) + m.imm4(toImmAny(v[0])) + }) + } + // SUBQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SUBQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xe8 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // SUBQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x2b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SUBQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SUBQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(5, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // SUBQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBQ") + } + return p +} + +// SUBSD performs "Subtract Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : SUBSD +// Supported forms : (2 forms) +// +// * SUBSD xmm, xmm [SSE2] +// * SUBSD m64, xmm [SSE2] +// +func (self *Program) SUBSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBSD", 2, Operands { v0, v1 }) + // SUBSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf2) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBSD") + } + return p +} + +// SUBSS performs "Subtract Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : SUBSS +// Supported forms : (2 forms) +// +// * SUBSS xmm, xmm [SSE] +// * SUBSS m32, xmm [SSE] +// +func (self *Program) SUBSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBSS", 2, Operands { v0, v1 }) + // SUBSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x5c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBSS") + } + return p +} + +// SUBW performs "Subtract". +// +// Mnemonic : SUB +// Supported forms : (8 forms) +// +// * SUBW imm16, ax +// * SUBW imm8, r16 +// * SUBW imm16, r16 +// * SUBW r16, r16 +// * SUBW m16, r16 +// * SUBW imm8, m16 +// * SUBW imm16, m16 +// * SUBW r16, m16 +// +func (self *Program) SUBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("SUBW", 2, Operands { v0, v1 }) + // SUBW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x2d) + m.imm2(toImmAny(v[0])) + }) + } + // SUBW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xe8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // SUBW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xe8 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // SUBW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // SUBW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x2b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // SUBW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(5, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // SUBW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(5, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // SUBW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for SUBW") + } + return p +} + +// SYSCALL performs "Fast System Call". +// +// Mnemonic : SYSCALL +// Supported forms : (1 form) +// +// * SYSCALL +// +func (self *Program) SYSCALL() *Instruction { + p := self.alloc("SYSCALL", 0, Operands { }) + // SYSCALL + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x05) + }) + return p +} + +// T1MSKC performs "Inverse Mask From Trailing Ones". +// +// Mnemonic : T1MSKC +// Supported forms : (4 forms) +// +// * T1MSKC r32, r32 [TBM] +// * T1MSKC m32, r32 [TBM] +// * T1MSKC r64, r64 [TBM] +// * T1MSKC m64, r64 [TBM] +// +func (self *Program) T1MSKC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("T1MSKC", 2, Operands { v0, v1 }) + // T1MSKC r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xf8 | lcode(v[0])) + }) + } + // T1MSKC m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(7, addr(v[0]), 1) + }) + } + // T1MSKC r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xf8 | lcode(v[0])) + }) + } + // T1MSKC m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(7, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for T1MSKC") + } + return p +} + +// TESTB performs "Logical Compare". +// +// Mnemonic : TEST +// Supported forms : (5 forms) +// +// * TESTB imm8, al +// * TESTB imm8, r8 +// * TESTB r8, r8 +// * TESTB imm8, m8 +// * TESTB r8, m8 +// +func (self *Program) TESTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TESTB", 2, Operands { v0, v1 }) + // TESTB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xa8) + m.imm1(toImmAny(v[0])) + }) + } + // TESTB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // TESTB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x84) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // TESTB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xf6) + m.mrsd(0, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // TESTB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x84) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TESTB") + } + return p +} + +// TESTL performs "Logical Compare". +// +// Mnemonic : TEST +// Supported forms : (5 forms) +// +// * TESTL imm32, eax +// * TESTL imm32, r32 +// * TESTL r32, r32 +// * TESTL imm32, m32 +// * TESTL r32, m32 +// +func (self *Program) TESTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TESTL", 2, Operands { v0, v1 }) + // TESTL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xa9) + m.imm4(toImmAny(v[0])) + }) + } + // TESTL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // TESTL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x85) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // TESTL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0xf7) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // TESTL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x85) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TESTL") + } + return p +} + +// TESTQ performs "Logical Compare". +// +// Mnemonic : TEST +// Supported forms : (5 forms) +// +// * TESTQ imm32, rax +// * TESTQ imm32, r64 +// * TESTQ r64, r64 +// * TESTQ imm32, m64 +// * TESTQ r64, m64 +// +func (self *Program) TESTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TESTQ", 2, Operands { v0, v1 }) + // TESTQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0xa9) + m.imm4(toImmAny(v[0])) + }) + } + // TESTQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // TESTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x85) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // TESTQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0xf7) + m.mrsd(0, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // TESTQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x85) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TESTQ") + } + return p +} + +// TESTW performs "Logical Compare". +// +// Mnemonic : TEST +// Supported forms : (5 forms) +// +// * TESTW imm16, ax +// * TESTW imm16, r16 +// * TESTW r16, r16 +// * TESTW imm16, m16 +// * TESTW r16, m16 +// +func (self *Program) TESTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TESTW", 2, Operands { v0, v1 }) + // TESTW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xa9) + m.imm2(toImmAny(v[0])) + }) + } + // TESTW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // TESTW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x85) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // TESTW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0xf7) + m.mrsd(0, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // TESTW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x85) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TESTW") + } + return p +} + +// TZCNTL performs "Count the Number of Trailing Zero Bits". +// +// Mnemonic : TZCNT +// Supported forms : (2 forms) +// +// * TZCNTL r32, r32 [BMI] +// * TZCNTL m32, r32 [BMI] +// +func (self *Program) TZCNTL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TZCNTL", 2, Operands { v0, v1 }) + // TZCNTL r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // TZCNTL m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TZCNTL") + } + return p +} + +// TZCNTQ performs "Count the Number of Trailing Zero Bits". +// +// Mnemonic : TZCNT +// Supported forms : (2 forms) +// +// * TZCNTQ r64, r64 [BMI] +// * TZCNTQ m64, r64 [BMI] +// +func (self *Program) TZCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TZCNTQ", 2, Operands { v0, v1 }) + // TZCNTQ r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // TZCNTQ m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xf3) + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TZCNTQ") + } + return p +} + +// TZCNTW performs "Count the Number of Trailing Zero Bits". +// +// Mnemonic : TZCNT +// Supported forms : (2 forms) +// +// * TZCNTW r16, r16 [BMI] +// * TZCNTW m16, r16 [BMI] +// +func (self *Program) TZCNTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TZCNTW", 2, Operands { v0, v1 }) + // TZCNTW r16, r16 + if isReg16(v0) && isReg16(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // TZCNTW m16, r16 + if isM16(v0) && isReg16(v1) { + self.require(ISA_BMI) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0xf3) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0xbc) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TZCNTW") + } + return p +} + +// TZMSK performs "Mask From Trailing Zeros". +// +// Mnemonic : TZMSK +// Supported forms : (4 forms) +// +// * TZMSK r32, r32 [TBM] +// * TZMSK m32, r32 [TBM] +// * TZMSK r64, r64 [TBM] +// * TZMSK m64, r64 [TBM] +// +func (self *Program) TZMSK(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("TZMSK", 2, Operands { v0, v1 }) + // TZMSK r32, r32 + if isReg32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0x78 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xe0 | lcode(v[0])) + }) + } + // TZMSK m32, r32 + if isM32(v0) && isReg32(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(4, addr(v[0]), 1) + }) + } + // TZMSK r64, r64 + if isReg64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xe0 | lcode(v[0])) + }) + } + // TZMSK m64, r64 + if isM64(v0) && isReg64(v1) { + self.require(ISA_TBM) + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, 0, addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(4, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for TZMSK") + } + return p +} + +// UCOMISD performs "Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : UCOMISD +// Supported forms : (2 forms) +// +// * UCOMISD xmm, xmm [SSE2] +// * UCOMISD m64, xmm [SSE2] +// +func (self *Program) UCOMISD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UCOMISD", 2, Operands { v0, v1 }) + // UCOMISD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UCOMISD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UCOMISD") + } + return p +} + +// UCOMISS performs "Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : UCOMISS +// Supported forms : (2 forms) +// +// * UCOMISS xmm, xmm [SSE] +// * UCOMISS m32, xmm [SSE] +// +func (self *Program) UCOMISS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UCOMISS", 2, Operands { v0, v1 }) + // UCOMISS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UCOMISS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UCOMISS") + } + return p +} + +// UD2 performs "Undefined Instruction". +// +// Mnemonic : UD2 +// Supported forms : (1 form) +// +// * UD2 +// +func (self *Program) UD2() *Instruction { + p := self.alloc("UD2", 0, Operands { }) + // UD2 + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x0b) + }) + return p +} + +// UNPCKHPD performs "Unpack and Interleave High Packed Double-Precision Floating-Point Values". +// +// Mnemonic : UNPCKHPD +// Supported forms : (2 forms) +// +// * UNPCKHPD xmm, xmm [SSE2] +// * UNPCKHPD m128, xmm [SSE2] +// +func (self *Program) UNPCKHPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UNPCKHPD", 2, Operands { v0, v1 }) + // UNPCKHPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UNPCKHPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UNPCKHPD") + } + return p +} + +// UNPCKHPS performs "Unpack and Interleave High Packed Single-Precision Floating-Point Values". +// +// Mnemonic : UNPCKHPS +// Supported forms : (2 forms) +// +// * UNPCKHPS xmm, xmm [SSE] +// * UNPCKHPS m128, xmm [SSE] +// +func (self *Program) UNPCKHPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UNPCKHPS", 2, Operands { v0, v1 }) + // UNPCKHPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UNPCKHPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UNPCKHPS") + } + return p +} + +// UNPCKLPD performs "Unpack and Interleave Low Packed Double-Precision Floating-Point Values". +// +// Mnemonic : UNPCKLPD +// Supported forms : (2 forms) +// +// * UNPCKLPD xmm, xmm [SSE2] +// * UNPCKLPD m128, xmm [SSE2] +// +func (self *Program) UNPCKLPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UNPCKLPD", 2, Operands { v0, v1 }) + // UNPCKLPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UNPCKLPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UNPCKLPD") + } + return p +} + +// UNPCKLPS performs "Unpack and Interleave Low Packed Single-Precision Floating-Point Values". +// +// Mnemonic : UNPCKLPS +// Supported forms : (2 forms) +// +// * UNPCKLPS xmm, xmm [SSE] +// * UNPCKLPS m128, xmm [SSE] +// +func (self *Program) UNPCKLPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("UNPCKLPS", 2, Operands { v0, v1 }) + // UNPCKLPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // UNPCKLPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for UNPCKLPS") + } + return p +} + +// VADDPD performs "Add Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VADDPD +// Supported forms : (11 forms) +// +// * VADDPD xmm, xmm, xmm [AVX] +// * VADDPD m128, xmm, xmm [AVX] +// * VADDPD ymm, ymm, ymm [AVX] +// * VADDPD m256, ymm, ymm [AVX] +// * VADDPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VADDPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VADDPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VADDPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VADDPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VADDPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VADDPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VADDPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VADDPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VADDPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VADDPD takes 3 or 4 operands") + } + // VADDPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VADDPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x58) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VADDPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VADDPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VADDPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VADDPD") + } + return p +} + +// VADDPS performs "Add Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VADDPS +// Supported forms : (11 forms) +// +// * VADDPS xmm, xmm, xmm [AVX] +// * VADDPS m128, xmm, xmm [AVX] +// * VADDPS ymm, ymm, ymm [AVX] +// * VADDPS m256, ymm, ymm [AVX] +// * VADDPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VADDPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VADDPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VADDPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VADDPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VADDPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VADDPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VADDPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VADDPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VADDPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VADDPS takes 3 or 4 operands") + } + // VADDPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VADDPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x58) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VADDPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VADDPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VADDPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VADDPS") + } + return p +} + +// VADDSD performs "Add Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VADDSD +// Supported forms : (5 forms) +// +// * VADDSD xmm, xmm, xmm [AVX] +// * VADDSD m64, xmm, xmm [AVX] +// * VADDSD m64, xmm, xmm{k}{z} [AVX512F] +// * VADDSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VADDSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VADDSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VADDSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VADDSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VADDSD takes 3 or 4 operands") + } + // VADDSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VADDSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x58) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VADDSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VADDSD") + } + return p +} + +// VADDSS performs "Add Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VADDSS +// Supported forms : (5 forms) +// +// * VADDSS xmm, xmm, xmm [AVX] +// * VADDSS m32, xmm, xmm [AVX] +// * VADDSS m32, xmm, xmm{k}{z} [AVX512F] +// * VADDSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VADDSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VADDSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VADDSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VADDSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VADDSS takes 3 or 4 operands") + } + // VADDSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x58) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VADDSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x58) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VADDSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x58) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VADDSS") + } + return p +} + +// VADDSUBPD performs "Packed Double-FP Add/Subtract". +// +// Mnemonic : VADDSUBPD +// Supported forms : (4 forms) +// +// * VADDSUBPD xmm, xmm, xmm [AVX] +// * VADDSUBPD m128, xmm, xmm [AVX] +// * VADDSUBPD ymm, ymm, ymm [AVX] +// * VADDSUBPD m256, ymm, ymm [AVX] +// +func (self *Program) VADDSUBPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VADDSUBPD", 3, Operands { v0, v1, v2 }) + // VADDSUBPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSUBPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDSUBPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSUBPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VADDSUBPD") + } + return p +} + +// VADDSUBPS performs "Packed Single-FP Add/Subtract". +// +// Mnemonic : VADDSUBPS +// Supported forms : (4 forms) +// +// * VADDSUBPS xmm, xmm, xmm [AVX] +// * VADDSUBPS m128, xmm, xmm [AVX] +// * VADDSUBPS ymm, ymm, ymm [AVX] +// * VADDSUBPS m256, ymm, ymm [AVX] +// +func (self *Program) VADDSUBPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VADDSUBPS", 3, Operands { v0, v1, v2 }) + // VADDSUBPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSUBPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VADDSUBPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VADDSUBPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VADDSUBPS") + } + return p +} + +// VAESDEC performs "Perform One Round of an AES Decryption Flow". +// +// Mnemonic : VAESDEC +// Supported forms : (2 forms) +// +// * VAESDEC xmm, xmm, xmm [AES,AVX] +// * VAESDEC m128, xmm, xmm [AES,AVX] +// +func (self *Program) VAESDEC(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESDEC", 3, Operands { v0, v1, v2 }) + // VAESDEC xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VAESDEC m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESDEC") + } + return p +} + +// VAESDECLAST performs "Perform Last Round of an AES Decryption Flow". +// +// Mnemonic : VAESDECLAST +// Supported forms : (2 forms) +// +// * VAESDECLAST xmm, xmm, xmm [AES,AVX] +// * VAESDECLAST m128, xmm, xmm [AES,AVX] +// +func (self *Program) VAESDECLAST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESDECLAST", 3, Operands { v0, v1, v2 }) + // VAESDECLAST xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VAESDECLAST m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESDECLAST") + } + return p +} + +// VAESENC performs "Perform One Round of an AES Encryption Flow". +// +// Mnemonic : VAESENC +// Supported forms : (2 forms) +// +// * VAESENC xmm, xmm, xmm [AES,AVX] +// * VAESENC m128, xmm, xmm [AES,AVX] +// +func (self *Program) VAESENC(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESENC", 3, Operands { v0, v1, v2 }) + // VAESENC xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VAESENC m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESENC") + } + return p +} + +// VAESENCLAST performs "Perform Last Round of an AES Encryption Flow". +// +// Mnemonic : VAESENCLAST +// Supported forms : (2 forms) +// +// * VAESENCLAST xmm, xmm, xmm [AES,AVX] +// * VAESENCLAST m128, xmm, xmm [AES,AVX] +// +func (self *Program) VAESENCLAST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESENCLAST", 3, Operands { v0, v1, v2 }) + // VAESENCLAST xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VAESENCLAST m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESENCLAST") + } + return p +} + +// VAESIMC performs "Perform the AES InvMixColumn Transformation". +// +// Mnemonic : VAESIMC +// Supported forms : (2 forms) +// +// * VAESIMC xmm, xmm [AES,AVX] +// * VAESIMC m128, xmm [AES,AVX] +// +func (self *Program) VAESIMC(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VAESIMC", 2, Operands { v0, v1 }) + // VAESIMC xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VAESIMC m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VAESIMC") + } + return p +} + +// VAESKEYGENASSIST performs "AES Round Key Generation Assist". +// +// Mnemonic : VAESKEYGENASSIST +// Supported forms : (2 forms) +// +// * VAESKEYGENASSIST imm8, xmm, xmm [AES,AVX] +// * VAESKEYGENASSIST imm8, m128, xmm [AES,AVX] +// +func (self *Program) VAESKEYGENASSIST(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VAESKEYGENASSIST", 3, Operands { v0, v1, v2 }) + // VAESKEYGENASSIST imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VAESKEYGENASSIST imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX | ISA_AES) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VAESKEYGENASSIST") + } + return p +} + +// VALIGND performs "Align Doubleword Vectors". +// +// Mnemonic : VALIGND +// Supported forms : (6 forms) +// +// * VALIGND imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VALIGND imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VALIGND imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VALIGND imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VALIGND imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VALIGND imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VALIGND(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VALIGND", 4, Operands { v0, v1, v2, v3 }) + // VALIGND imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, m128/m32bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGND imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VALIGND") + } + return p +} + +// VALIGNQ performs "Align Quadword Vectors". +// +// Mnemonic : VALIGNQ +// Supported forms : (6 forms) +// +// * VALIGNQ imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VALIGNQ imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VALIGNQ imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VALIGNQ imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VALIGNQ imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VALIGNQ imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VALIGNQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VALIGNQ", 4, Operands { v0, v1, v2, v3 }) + // VALIGNQ imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, m128/m64bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VALIGNQ imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x03) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VALIGNQ") + } + return p +} + +// VANDNPD performs "Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VANDNPD +// Supported forms : (10 forms) +// +// * VANDNPD xmm, xmm, xmm [AVX] +// * VANDNPD m128, xmm, xmm [AVX] +// * VANDNPD ymm, ymm, ymm [AVX] +// * VANDNPD m256, ymm, ymm [AVX] +// * VANDNPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VANDNPD zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VANDNPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VANDNPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VANDNPD", 3, Operands { v0, v1, v2 }) + // VANDNPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDNPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDNPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VANDNPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VANDNPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VANDNPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VANDNPD") + } + return p +} + +// VANDNPS performs "Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VANDNPS +// Supported forms : (10 forms) +// +// * VANDNPS xmm, xmm, xmm [AVX] +// * VANDNPS m128, xmm, xmm [AVX] +// * VANDNPS ymm, ymm, ymm [AVX] +// * VANDNPS m256, ymm, ymm [AVX] +// * VANDNPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VANDNPS zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VANDNPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VANDNPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VANDNPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VANDNPS", 3, Operands { v0, v1, v2 }) + // VANDNPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDNPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDNPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VANDNPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VANDNPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDNPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VANDNPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x55) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VANDNPS") + } + return p +} + +// VANDPD performs "Bitwise Logical AND of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VANDPD +// Supported forms : (10 forms) +// +// * VANDPD xmm, xmm, xmm [AVX] +// * VANDPD m128, xmm, xmm [AVX] +// * VANDPD ymm, ymm, ymm [AVX] +// * VANDPD m256, ymm, ymm [AVX] +// * VANDPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VANDPD zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VANDPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VANDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VANDPD", 3, Operands { v0, v1, v2 }) + // VANDPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VANDPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VANDPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VANDPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VANDPD") + } + return p +} + +// VANDPS performs "Bitwise Logical AND of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VANDPS +// Supported forms : (10 forms) +// +// * VANDPS xmm, xmm, xmm [AVX] +// * VANDPS m128, xmm, xmm [AVX] +// * VANDPS ymm, ymm, ymm [AVX] +// * VANDPS m256, ymm, ymm [AVX] +// * VANDPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VANDPS zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VANDPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VANDPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VANDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VANDPS", 3, Operands { v0, v1, v2 }) + // VANDPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VANDPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VANDPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VANDPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VANDPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x54) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VANDPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x54) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VANDPS") + } + return p +} + +// VBLENDMPD performs "Blend Packed Double-Precision Floating-Point Vectors Using an OpMask Control". +// +// Mnemonic : VBLENDMPD +// Supported forms : (6 forms) +// +// * VBLENDMPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VBLENDMPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VBLENDMPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBLENDMPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VBLENDMPD", 3, Operands { v0, v1, v2 }) + // VBLENDMPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VBLENDMPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VBLENDMPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VBLENDMPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VBLENDMPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VBLENDMPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDMPD") + } + return p +} + +// VBLENDMPS performs "Blend Packed Single-Precision Floating-Point Vectors Using an OpMask Control". +// +// Mnemonic : VBLENDMPS +// Supported forms : (6 forms) +// +// * VBLENDMPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VBLENDMPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VBLENDMPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VBLENDMPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBLENDMPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VBLENDMPS", 3, Operands { v0, v1, v2 }) + // VBLENDMPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VBLENDMPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VBLENDMPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VBLENDMPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VBLENDMPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VBLENDMPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDMPS") + } + return p +} + +// VBLENDPD performs "Blend Packed Double Precision Floating-Point Values". +// +// Mnemonic : VBLENDPD +// Supported forms : (4 forms) +// +// * VBLENDPD imm8, xmm, xmm, xmm [AVX] +// * VBLENDPD imm8, m128, xmm, xmm [AVX] +// * VBLENDPD imm8, ymm, ymm, ymm [AVX] +// * VBLENDPD imm8, m256, ymm, ymm [AVX] +// +func (self *Program) VBLENDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VBLENDPD", 4, Operands { v0, v1, v2, v3 }) + // VBLENDPD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPD imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPD imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDPD") + } + return p +} + +// VBLENDPS performs " Blend Packed Single Precision Floating-Point Values". +// +// Mnemonic : VBLENDPS +// Supported forms : (4 forms) +// +// * VBLENDPS imm8, xmm, xmm, xmm [AVX] +// * VBLENDPS imm8, m128, xmm, xmm [AVX] +// * VBLENDPS imm8, ymm, ymm, ymm [AVX] +// * VBLENDPS imm8, m256, ymm, ymm [AVX] +// +func (self *Program) VBLENDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VBLENDPS", 4, Operands { v0, v1, v2, v3 }) + // VBLENDPS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPS imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPS imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VBLENDPS imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDPS") + } + return p +} + +// VBLENDVPD performs " Variable Blend Packed Double Precision Floating-Point Values". +// +// Mnemonic : VBLENDVPD +// Supported forms : (4 forms) +// +// * VBLENDVPD xmm, xmm, xmm, xmm [AVX] +// * VBLENDVPD xmm, m128, xmm, xmm [AVX] +// * VBLENDVPD ymm, ymm, ymm, ymm [AVX] +// * VBLENDVPD ymm, m256, ymm, ymm [AVX] +// +func (self *Program) VBLENDVPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VBLENDVPD", 4, Operands { v0, v1, v2, v3 }) + // VBLENDVPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x4b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDVPD") + } + return p +} + +// VBLENDVPS performs " Variable Blend Packed Single Precision Floating-Point Values". +// +// Mnemonic : VBLENDVPS +// Supported forms : (4 forms) +// +// * VBLENDVPS xmm, xmm, xmm, xmm [AVX] +// * VBLENDVPS xmm, m128, xmm, xmm [AVX] +// * VBLENDVPS ymm, ymm, ymm, ymm [AVX] +// * VBLENDVPS ymm, m256, ymm, ymm [AVX] +// +func (self *Program) VBLENDVPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VBLENDVPS", 4, Operands { v0, v1, v2, v3 }) + // VBLENDVPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x4a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VBLENDVPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VBLENDVPS") + } + return p +} + +// VBROADCASTF128 performs "Broadcast 128 Bit of Floating-Point Data". +// +// Mnemonic : VBROADCASTF128 +// Supported forms : (1 form) +// +// * VBROADCASTF128 m128, ymm [AVX] +// +func (self *Program) VBROADCASTF128(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF128", 2, Operands { v0, v1 }) + // VBROADCASTF128 m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF128") + } + return p +} + +// VBROADCASTF32X2 performs "Broadcast Two Single-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF32X2 +// Supported forms : (4 forms) +// +// * VBROADCASTF32X2 xmm, zmm{k}{z} [AVX512DQ] +// * VBROADCASTF32X2 m64, zmm{k}{z} [AVX512DQ] +// * VBROADCASTF32X2 xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VBROADCASTF32X2 m64, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VBROADCASTF32X2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF32X2", 2, Operands { v0, v1 }) + // VBROADCASTF32X2 xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTF32X2 m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VBROADCASTF32X2 xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTF32X2 m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF32X2") + } + return p +} + +// VBROADCASTF32X4 performs "Broadcast Four Single-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF32X4 +// Supported forms : (2 forms) +// +// * VBROADCASTF32X4 m128, zmm{k}{z} [AVX512F] +// * VBROADCASTF32X4 m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBROADCASTF32X4(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF32X4", 2, Operands { v0, v1 }) + // VBROADCASTF32X4 m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VBROADCASTF32X4 m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF32X4") + } + return p +} + +// VBROADCASTF32X8 performs "Broadcast Eight Single-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF32X8 +// Supported forms : (1 form) +// +// * VBROADCASTF32X8 m256, zmm{k}{z} [AVX512DQ] +// +func (self *Program) VBROADCASTF32X8(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF32X8", 2, Operands { v0, v1 }) + // VBROADCASTF32X8 m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF32X8") + } + return p +} + +// VBROADCASTF64X2 performs "Broadcast Two Double-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF64X2 +// Supported forms : (2 forms) +// +// * VBROADCASTF64X2 m128, zmm{k}{z} [AVX512DQ] +// * VBROADCASTF64X2 m128, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VBROADCASTF64X2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF64X2", 2, Operands { v0, v1 }) + // VBROADCASTF64X2 m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VBROADCASTF64X2 m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF64X2") + } + return p +} + +// VBROADCASTF64X4 performs "Broadcast Four Double-Precision Floating-Point Elements". +// +// Mnemonic : VBROADCASTF64X4 +// Supported forms : (1 form) +// +// * VBROADCASTF64X4 m256, zmm{k}{z} [AVX512F] +// +func (self *Program) VBROADCASTF64X4(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTF64X4", 2, Operands { v0, v1 }) + // VBROADCASTF64X4 m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTF64X4") + } + return p +} + +// VBROADCASTI128 performs "Broadcast 128 Bits of Integer Data". +// +// Mnemonic : VBROADCASTI128 +// Supported forms : (1 form) +// +// * VBROADCASTI128 m128, ymm [AVX2] +// +func (self *Program) VBROADCASTI128(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI128", 2, Operands { v0, v1 }) + // VBROADCASTI128 m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI128") + } + return p +} + +// VBROADCASTI32X2 performs "Broadcast Two Doubleword Elements". +// +// Mnemonic : VBROADCASTI32X2 +// Supported forms : (6 forms) +// +// * VBROADCASTI32X2 xmm, zmm{k}{z} [AVX512DQ] +// * VBROADCASTI32X2 m64, zmm{k}{z} [AVX512DQ] +// * VBROADCASTI32X2 xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VBROADCASTI32X2 xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VBROADCASTI32X2 m64, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VBROADCASTI32X2 m64, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VBROADCASTI32X2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI32X2", 2, Operands { v0, v1 }) + // VBROADCASTI32X2 xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTI32X2 m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VBROADCASTI32X2 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTI32X2 xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTI32X2 m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VBROADCASTI32X2 m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI32X2") + } + return p +} + +// VBROADCASTI32X4 performs "Broadcast Four Doubleword Elements". +// +// Mnemonic : VBROADCASTI32X4 +// Supported forms : (2 forms) +// +// * VBROADCASTI32X4 m128, zmm{k}{z} [AVX512F] +// * VBROADCASTI32X4 m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBROADCASTI32X4(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI32X4", 2, Operands { v0, v1 }) + // VBROADCASTI32X4 m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VBROADCASTI32X4 m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI32X4") + } + return p +} + +// VBROADCASTI32X8 performs "Broadcast Eight Doubleword Elements". +// +// Mnemonic : VBROADCASTI32X8 +// Supported forms : (1 form) +// +// * VBROADCASTI32X8 m256, zmm{k}{z} [AVX512DQ] +// +func (self *Program) VBROADCASTI32X8(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI32X8", 2, Operands { v0, v1 }) + // VBROADCASTI32X8 m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI32X8") + } + return p +} + +// VBROADCASTI64X2 performs "Broadcast Two Quadword Elements". +// +// Mnemonic : VBROADCASTI64X2 +// Supported forms : (2 forms) +// +// * VBROADCASTI64X2 m128, zmm{k}{z} [AVX512DQ] +// * VBROADCASTI64X2 m128, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VBROADCASTI64X2(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI64X2", 2, Operands { v0, v1 }) + // VBROADCASTI64X2 m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VBROADCASTI64X2 m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI64X2") + } + return p +} + +// VBROADCASTI64X4 performs "Broadcast Four Quadword Elements". +// +// Mnemonic : VBROADCASTI64X4 +// Supported forms : (1 form) +// +// * VBROADCASTI64X4 m256, zmm{k}{z} [AVX512F] +// +func (self *Program) VBROADCASTI64X4(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTI64X4", 2, Operands { v0, v1 }) + // VBROADCASTI64X4 m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTI64X4") + } + return p +} + +// VBROADCASTSD performs "Broadcast Double-Precision Floating-Point Element". +// +// Mnemonic : VBROADCASTSD +// Supported forms : (6 forms) +// +// * VBROADCASTSD m64, ymm [AVX] +// * VBROADCASTSD xmm, ymm [AVX2] +// * VBROADCASTSD xmm, zmm{k}{z} [AVX512F] +// * VBROADCASTSD m64, zmm{k}{z} [AVX512F] +// * VBROADCASTSD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VBROADCASTSD m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBROADCASTSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTSD", 2, Operands { v0, v1 }) + // VBROADCASTSD m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VBROADCASTSD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSD xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSD m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VBROADCASTSD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSD m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTSD") + } + return p +} + +// VBROADCASTSS performs "Broadcast Single-Precision Floating-Point Element". +// +// Mnemonic : VBROADCASTSS +// Supported forms : (8 forms) +// +// * VBROADCASTSS m32, xmm [AVX] +// * VBROADCASTSS m32, ymm [AVX] +// * VBROADCASTSS xmm, xmm [AVX2] +// * VBROADCASTSS xmm, ymm [AVX2] +// * VBROADCASTSS xmm, zmm{k}{z} [AVX512F] +// * VBROADCASTSS m32, zmm{k}{z} [AVX512F] +// * VBROADCASTSS xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VBROADCASTSS m32, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VBROADCASTSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VBROADCASTSS", 2, Operands { v0, v1 }) + // VBROADCASTSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x18) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VBROADCASTSS m32, ymm + if isM32(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x18) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VBROADCASTSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x18) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSS xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x18) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSS xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x18) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSS m32, zmm{k}{z} + if isM32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x18) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VBROADCASTSS xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x18) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VBROADCASTSS m32, ymm{k}{z} + if isM32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x18) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VBROADCASTSS") + } + return p +} + +// VCMPPD performs "Compare Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VCMPPD +// Supported forms : (11 forms) +// +// * VCMPPD imm8, xmm, xmm, xmm [AVX] +// * VCMPPD imm8, m128, xmm, xmm [AVX] +// * VCMPPD imm8, ymm, ymm, ymm [AVX] +// * VCMPPD imm8, m256, ymm, ymm [AVX] +// * VCMPPD imm8, m512/m64bcst, zmm, k{k} [AVX512F] +// * VCMPPD imm8, {sae}, zmm, zmm, k{k} [AVX512F] +// * VCMPPD imm8, zmm, zmm, k{k} [AVX512F] +// * VCMPPD imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VCMPPD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VCMPPD imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VCMPPD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VCMPPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCMPPD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VCMPPD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VCMPPD takes 4 or 5 operands") + } + // VCMPPD imm8, xmm, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m128, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, ymm, ymm, ymm + if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m256, ymm, ymm + if len(vv) == 0 && isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m512/m64bcst, zmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, {sae}, zmm, zmm, k{k} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isKk(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, zmm, zmm, k{k} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m128/m64bcst, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, xmm, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, m256/m64bcst, ymm, k{k} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPD imm8, ymm, ymm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCMPPD") + } + return p +} + +// VCMPPS performs "Compare Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VCMPPS +// Supported forms : (11 forms) +// +// * VCMPPS imm8, xmm, xmm, xmm [AVX] +// * VCMPPS imm8, m128, xmm, xmm [AVX] +// * VCMPPS imm8, ymm, ymm, ymm [AVX] +// * VCMPPS imm8, m256, ymm, ymm [AVX] +// * VCMPPS imm8, m512/m32bcst, zmm, k{k} [AVX512F] +// * VCMPPS imm8, {sae}, zmm, zmm, k{k} [AVX512F] +// * VCMPPS imm8, zmm, zmm, k{k} [AVX512F] +// * VCMPPS imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VCMPPS imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VCMPPS imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VCMPPS imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VCMPPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCMPPS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VCMPPS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VCMPPS takes 4 or 5 operands") + } + // VCMPPS imm8, xmm, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m128, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, ymm, ymm, ymm + if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m256, ymm, ymm + if len(vv) == 0 && isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m512/m32bcst, zmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, {sae}, zmm, zmm, k{k} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isKk(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7c ^ (hlcode(v[3]) << 3)) + m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, zmm, zmm, k{k} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m128/m32bcst, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, xmm, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, m256/m32bcst, ymm, k{k} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPPS imm8, ymm, ymm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCMPPS") + } + return p +} + +// VCMPSD performs "Compare Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VCMPSD +// Supported forms : (5 forms) +// +// * VCMPSD imm8, xmm, xmm, xmm [AVX] +// * VCMPSD imm8, m64, xmm, xmm [AVX] +// * VCMPSD imm8, m64, xmm, k{k} [AVX512F] +// * VCMPSD imm8, {sae}, xmm, xmm, k{k} [AVX512F] +// * VCMPSD imm8, xmm, xmm, k{k} [AVX512F] +// +func (self *Program) VCMPSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCMPSD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VCMPSD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VCMPSD takes 4 or 5 operands") + } + // VCMPSD imm8, xmm, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSD imm8, m64, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSD imm8, m64, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSD imm8, {sae}, xmm, xmm, k{k} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isKk(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xff ^ (hlcode(v[3]) << 3)) + m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSD imm8, xmm, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCMPSD") + } + return p +} + +// VCMPSS performs "Compare Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VCMPSS +// Supported forms : (5 forms) +// +// * VCMPSS imm8, xmm, xmm, xmm [AVX] +// * VCMPSS imm8, m32, xmm, xmm [AVX] +// * VCMPSS imm8, m32, xmm, k{k} [AVX512F] +// * VCMPSS imm8, {sae}, xmm, xmm, k{k} [AVX512F] +// * VCMPSS imm8, xmm, xmm, k{k} [AVX512F] +// +func (self *Program) VCMPSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCMPSS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VCMPSS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VCMPSS takes 4 or 5 operands") + } + // VCMPSS imm8, xmm, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSS imm8, m32, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSS imm8, m32, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0xc2) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSS imm8, {sae}, xmm, xmm, k{k} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isKk(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7e ^ (hlcode(v[3]) << 3)) + m.emit((0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCMPSS imm8, xmm, xmm, k{k} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCMPSS") + } + return p +} + +// VCOMISD performs "Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : VCOMISD +// Supported forms : (5 forms) +// +// * VCOMISD xmm, xmm [AVX] +// * VCOMISD m64, xmm [AVX] +// * VCOMISD m64, xmm [AVX512F] +// * VCOMISD {sae}, xmm, xmm [AVX512F] +// * VCOMISD xmm, xmm [AVX512F] +// +func (self *Program) VCOMISD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCOMISD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCOMISD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCOMISD takes 2 or 3 operands") + } + // VCOMISD xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCOMISD m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCOMISD m64, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCOMISD {sae}, xmm, xmm + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(0x18) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCOMISD xmm, xmm + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit(0x48) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCOMISD") + } + return p +} + +// VCOMISS performs "Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : VCOMISS +// Supported forms : (5 forms) +// +// * VCOMISS xmm, xmm [AVX] +// * VCOMISS m32, xmm [AVX] +// * VCOMISS m32, xmm [AVX512F] +// * VCOMISS {sae}, xmm, xmm [AVX512F] +// * VCOMISS xmm, xmm [AVX512F] +// +func (self *Program) VCOMISS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCOMISS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCOMISS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCOMISS takes 2 or 3 operands") + } + // VCOMISS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCOMISS m32, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCOMISS m32, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2f) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCOMISS {sae}, xmm, xmm + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit(0x18) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCOMISS xmm, xmm + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit(0x48) + m.emit(0x2f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCOMISS") + } + return p +} + +// VCOMPRESSPD performs "Store Sparse Packed Double-Precision Floating-Point Values into Dense Memory/Register". +// +// Mnemonic : VCOMPRESSPD +// Supported forms : (6 forms) +// +// * VCOMPRESSPD zmm, zmm{k}{z} [AVX512F] +// * VCOMPRESSPD zmm, m512{k}{z} [AVX512F] +// * VCOMPRESSPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPD xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPD ymm, m256{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCOMPRESSPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VCOMPRESSPD", 2, Operands { v0, v1 }) + // VCOMPRESSPD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPD zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VCOMPRESSPD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPD xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VCOMPRESSPD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPD ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VCOMPRESSPD") + } + return p +} + +// VCOMPRESSPS performs "Store Sparse Packed Single-Precision Floating-Point Values into Dense Memory/Register". +// +// Mnemonic : VCOMPRESSPS +// Supported forms : (6 forms) +// +// * VCOMPRESSPS zmm, zmm{k}{z} [AVX512F] +// * VCOMPRESSPS zmm, m512{k}{z} [AVX512F] +// * VCOMPRESSPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPS xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VCOMPRESSPS ymm, m256{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCOMPRESSPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VCOMPRESSPS", 2, Operands { v0, v1 }) + // VCOMPRESSPS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPS zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VCOMPRESSPS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPS xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VCOMPRESSPS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x8a) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VCOMPRESSPS ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8a) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VCOMPRESSPS") + } + return p +} + +// VCVTDQ2PD performs "Convert Packed Dword Integers to Packed Double-Precision FP Values". +// +// Mnemonic : VCVTDQ2PD +// Supported forms : (10 forms) +// +// * VCVTDQ2PD xmm, xmm [AVX] +// * VCVTDQ2PD m64, xmm [AVX] +// * VCVTDQ2PD xmm, ymm [AVX] +// * VCVTDQ2PD m128, ymm [AVX] +// * VCVTDQ2PD m256/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTDQ2PD ymm, zmm{k}{z} [AVX512F] +// * VCVTDQ2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PD m128/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTDQ2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VCVTDQ2PD", 2, Operands { v0, v1 }) + // VCVTDQ2PD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTDQ2PD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PD m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTDQ2PD m256/m32bcst, zmm{k}{z} + if isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTDQ2PD ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PD m64/m32bcst, xmm{k}{z} + if isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTDQ2PD m128/m32bcst, ymm{k}{z} + if isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTDQ2PD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTDQ2PD") + } + return p +} + +// VCVTDQ2PS performs "Convert Packed Dword Integers to Packed Single-Precision FP Values". +// +// Mnemonic : VCVTDQ2PS +// Supported forms : (11 forms) +// +// * VCVTDQ2PS xmm, xmm [AVX] +// * VCVTDQ2PS m128, xmm [AVX] +// * VCVTDQ2PS ymm, ymm [AVX] +// * VCVTDQ2PS m256, ymm [AVX] +// * VCVTDQ2PS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTDQ2PS {er}, zmm, zmm{k}{z} [AVX512F] +// * VCVTDQ2PS zmm, zmm{k}{z} [AVX512F] +// * VCVTDQ2PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTDQ2PS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTDQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTDQ2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTDQ2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTDQ2PS takes 2 or 3 operands") + } + // VCVTDQ2PS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PS m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTDQ2PS ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PS m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTDQ2PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTDQ2PS {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTDQ2PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PS m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTDQ2PS m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTDQ2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTDQ2PS ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTDQ2PS") + } + return p +} + +// VCVTPD2DQ performs "Convert Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : VCVTPD2DQ +// Supported forms : (11 forms) +// +// * VCVTPD2DQ xmm, xmm [AVX] +// * VCVTPD2DQ ymm, xmm [AVX] +// * VCVTPD2DQ m128, xmm [AVX] +// * VCVTPD2DQ m256, xmm [AVX] +// * VCVTPD2DQ m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTPD2DQ {er}, zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2DQ zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2DQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2DQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2DQ ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPD2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2DQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2DQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2DQ takes 2 or 3 operands") + } + // VCVTPD2DQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2DQ ymm, xmm + if len(vv) == 0 && isYMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2DQ m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPD2DQ m256, xmm + if len(vv) == 0 && isM256(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPD2DQ m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2DQ {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2DQ zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2DQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2DQ m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2DQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2DQ ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2DQ") + } + return p +} + +// VCVTPD2PS performs "Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values". +// +// Mnemonic : VCVTPD2PS +// Supported forms : (11 forms) +// +// * VCVTPD2PS xmm, xmm [AVX] +// * VCVTPD2PS ymm, xmm [AVX] +// * VCVTPD2PS m128, xmm [AVX] +// * VCVTPD2PS m256, xmm [AVX] +// * VCVTPD2PS m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTPD2PS {er}, zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2PS zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2PS m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2PS m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2PS ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPD2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2PS takes 2 or 3 operands") + } + // VCVTPD2PS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2PS ymm, xmm + if len(vv) == 0 && isYMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2PS m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPD2PS m256, xmm + if len(vv) == 0 && isM256(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPD2PS m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2PS {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2PS zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2PS m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2PS m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2PS ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2PS") + } + return p +} + +// VCVTPD2QQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Quadword Integers". +// +// Mnemonic : VCVTPD2QQ +// Supported forms : (7 forms) +// +// * VCVTPD2QQ m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTPD2QQ {er}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTPD2QQ zmm, zmm{k}{z} [AVX512DQ] +// * VCVTPD2QQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2QQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2QQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTPD2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2QQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2QQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2QQ takes 2 or 3 operands") + } + // VCVTPD2QQ m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2QQ {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2QQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2QQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2QQ m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2QQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2QQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2QQ") + } + return p +} + +// VCVTPD2UDQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers". +// +// Mnemonic : VCVTPD2UDQ +// Supported forms : (7 forms) +// +// * VCVTPD2UDQ m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTPD2UDQ {er}, zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2UDQ zmm, ymm{k}{z} [AVX512F] +// * VCVTPD2UDQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2UDQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPD2UDQ ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPD2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2UDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2UDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2UDQ takes 2 or 3 operands") + } + // VCVTPD2UDQ m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2UDQ {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2UDQ zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2UDQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2UDQ m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2UDQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2UDQ ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2UDQ") + } + return p +} + +// VCVTPD2UQQ performs "Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers". +// +// Mnemonic : VCVTPD2UQQ +// Supported forms : (7 forms) +// +// * VCVTPD2UQQ m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTPD2UQQ {er}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTPD2UQQ zmm, zmm{k}{z} [AVX512DQ] +// * VCVTPD2UQQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2UQQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPD2UQQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTPD2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPD2UQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPD2UQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPD2UQQ takes 2 or 3 operands") + } + // VCVTPD2UQQ m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPD2UQQ {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPD2UQQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2UQQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPD2UQQ m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPD2UQQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPD2UQQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPD2UQQ") + } + return p +} + +// VCVTPH2PS performs "Convert Half-Precision FP Values to Single-Precision FP Values". +// +// Mnemonic : VCVTPH2PS +// Supported forms : (11 forms) +// +// * VCVTPH2PS xmm, xmm [F16C] +// * VCVTPH2PS m64, xmm [F16C] +// * VCVTPH2PS xmm, ymm [F16C] +// * VCVTPH2PS m128, ymm [F16C] +// * VCVTPH2PS m256, zmm{k}{z} [AVX512F] +// * VCVTPH2PS {sae}, ymm, zmm{k}{z} [AVX512F] +// * VCVTPH2PS ymm, zmm{k}{z} [AVX512F] +// * VCVTPH2PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPH2PS xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTPH2PS m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPH2PS m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPH2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPH2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPH2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPH2PS takes 2 or 3 operands") + } + // VCVTPH2PS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPH2PS xmm, ymm + if len(vv) == 0 && isXMM(v0) && isYMM(v1) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS m128, ymm + if len(vv) == 0 && isM128(v0) && isYMM(v1) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPH2PS m256, zmm{k}{z} + if len(vv) == 0 && isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPH2PS {sae}, ymm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x13) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPH2PS ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x13) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPH2PS m64, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTPH2PS m128, ymm{k}{z} + if len(vv) == 0 && isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPH2PS") + } + return p +} + +// VCVTPS2DQ performs "Convert Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : VCVTPS2DQ +// Supported forms : (11 forms) +// +// * VCVTPS2DQ xmm, xmm [AVX] +// * VCVTPS2DQ m128, xmm [AVX] +// * VCVTPS2DQ ymm, ymm [AVX] +// * VCVTPS2DQ m256, ymm [AVX] +// * VCVTPS2DQ m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTPS2DQ {er}, zmm, zmm{k}{z} [AVX512F] +// * VCVTPS2DQ zmm, zmm{k}{z} [AVX512F] +// * VCVTPS2DQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2DQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2DQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPS2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2DQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2DQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2DQ takes 2 or 3 operands") + } + // VCVTPS2DQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2DQ m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPS2DQ ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2DQ m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPS2DQ m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPS2DQ {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2DQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2DQ m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2DQ m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2DQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2DQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2DQ") + } + return p +} + +// VCVTPS2PD performs "Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values". +// +// Mnemonic : VCVTPS2PD +// Supported forms : (11 forms) +// +// * VCVTPS2PD xmm, xmm [AVX] +// * VCVTPS2PD m64, xmm [AVX] +// * VCVTPS2PD xmm, ymm [AVX] +// * VCVTPS2PD m128, ymm [AVX] +// * VCVTPS2PD m256/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTPS2PD {sae}, ymm, zmm{k}{z} [AVX512F] +// * VCVTPS2PD ymm, zmm{k}{z} [AVX512F] +// * VCVTPS2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PD m128/m32bcst, ymm{k}{z} [AVX512VL] +// * VCVTPS2PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PD xmm, ymm{k}{z} [AVX512VL] +// +func (self *Program) VCVTPS2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2PD takes 2 or 3 operands") + } + // VCVTPS2PD xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2PD m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPS2PD xmm, ymm + if len(vv) == 0 && isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2PD m128, ymm + if len(vv) == 0 && isM128(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTPS2PD m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2PD {sae}, ymm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2PD ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2PD m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTPS2PD m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2PD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2PD xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2PD") + } + return p +} + +// VCVTPS2PH performs "Convert Single-Precision FP value to Half-Precision FP value". +// +// Mnemonic : VCVTPS2PH +// Supported forms : (11 forms) +// +// * VCVTPS2PH imm8, xmm, xmm [F16C] +// * VCVTPS2PH imm8, ymm, xmm [F16C] +// * VCVTPS2PH imm8, xmm, m64 [F16C] +// * VCVTPS2PH imm8, ymm, m128 [F16C] +// * VCVTPS2PH imm8, zmm, m256{k}{z} [AVX512F] +// * VCVTPS2PH imm8, {sae}, zmm, ymm{k}{z} [AVX512F] +// * VCVTPS2PH imm8, zmm, ymm{k}{z} [AVX512F] +// * VCVTPS2PH imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PH imm8, xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PH imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2PH imm8, ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPS2PH(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2PH", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTPS2PH", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTPS2PH takes 3 or 4 operands") + } + // VCVTPS2PH imm8, xmm, xmm + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, ymm, xmm + if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isXMM(v2) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, xmm, m64 + if len(vv) == 0 && isImm8(v0) && isXMM(v1) && isM64(v2) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, ymm, m128 + if len(vv) == 0 && isImm8(v0) && isYMM(v1) && isM128(v2) { + self.require(ISA_F16C) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, zmm, m256{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, {sae}, zmm, ymm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[3]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[3])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, zmm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, xmm, m64{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isM64kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, ymm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VCVTPS2PH imm8, ymm, m128{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2PH") + } + return p +} + +// VCVTPS2QQ performs "Convert Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values". +// +// Mnemonic : VCVTPS2QQ +// Supported forms : (7 forms) +// +// * VCVTPS2QQ m256/m32bcst, zmm{k}{z} [AVX512DQ] +// * VCVTPS2QQ {er}, ymm, zmm{k}{z} [AVX512DQ] +// * VCVTPS2QQ ymm, zmm{k}{z} [AVX512DQ] +// * VCVTPS2QQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2QQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2QQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTPS2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2QQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2QQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2QQ takes 2 or 3 operands") + } + // VCVTPS2QQ m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2QQ {er}, ymm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2QQ ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2QQ m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTPS2QQ m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2QQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2QQ xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2QQ") + } + return p +} + +// VCVTPS2UDQ performs "Convert Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values". +// +// Mnemonic : VCVTPS2UDQ +// Supported forms : (7 forms) +// +// * VCVTPS2UDQ m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTPS2UDQ {er}, zmm, zmm{k}{z} [AVX512F] +// * VCVTPS2UDQ zmm, zmm{k}{z} [AVX512F] +// * VCVTPS2UDQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2UDQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTPS2UDQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTPS2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2UDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2UDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2UDQ takes 2 or 3 operands") + } + // VCVTPS2UDQ m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTPS2UDQ {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2UDQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2UDQ m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2UDQ m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2UDQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2UDQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2UDQ") + } + return p +} + +// VCVTPS2UQQ performs "Convert Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values". +// +// Mnemonic : VCVTPS2UQQ +// Supported forms : (7 forms) +// +// * VCVTPS2UQQ m256/m32bcst, zmm{k}{z} [AVX512DQ] +// * VCVTPS2UQQ {er}, ymm, zmm{k}{z} [AVX512DQ] +// * VCVTPS2UQQ ymm, zmm{k}{z} [AVX512DQ] +// * VCVTPS2UQQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2UQQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTPS2UQQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTPS2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTPS2UQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTPS2UQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTPS2UQQ takes 2 or 3 operands") + } + // VCVTPS2UQQ m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTPS2UQQ {er}, ymm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTPS2UQQ ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2UQQ m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTPS2UQQ m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTPS2UQQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTPS2UQQ xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTPS2UQQ") + } + return p +} + +// VCVTQQ2PD performs "Convert Packed Quadword Integers to Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VCVTQQ2PD +// Supported forms : (7 forms) +// +// * VCVTQQ2PD m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTQQ2PD {er}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTQQ2PD zmm, zmm{k}{z} [AVX512DQ] +// * VCVTQQ2PD m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PD m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PD xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PD ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTQQ2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTQQ2PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTQQ2PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTQQ2PD takes 2 or 3 operands") + } + // VCVTQQ2PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTQQ2PD {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTQQ2PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTQQ2PD m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTQQ2PD m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTQQ2PD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTQQ2PD ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTQQ2PD") + } + return p +} + +// VCVTQQ2PS performs "Convert Packed Quadword Integers to Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VCVTQQ2PS +// Supported forms : (7 forms) +// +// * VCVTQQ2PS m512/m64bcst, ymm{k}{z} [AVX512DQ] +// * VCVTQQ2PS {er}, zmm, ymm{k}{z} [AVX512DQ] +// * VCVTQQ2PS zmm, ymm{k}{z} [AVX512DQ] +// * VCVTQQ2PS m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PS m256/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PS xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTQQ2PS ymm, xmm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTQQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTQQ2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTQQ2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTQQ2PS takes 2 or 3 operands") + } + // VCVTQQ2PS m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTQQ2PS {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTQQ2PS zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTQQ2PS m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTQQ2PS m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTQQ2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTQQ2PS ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTQQ2PS") + } + return p +} + +// VCVTSD2SI performs "Convert Scalar Double-Precision FP Value to Integer". +// +// Mnemonic : VCVTSD2SI +// Supported forms : (10 forms) +// +// * VCVTSD2SI xmm, r32 [AVX] +// * VCVTSD2SI m64, r32 [AVX] +// * VCVTSD2SI xmm, r64 [AVX] +// * VCVTSD2SI m64, r64 [AVX] +// * VCVTSD2SI m64, r32 [AVX512F] +// * VCVTSD2SI m64, r64 [AVX512F] +// * VCVTSD2SI {er}, xmm, r32 [AVX512F] +// * VCVTSD2SI {er}, xmm, r64 [AVX512F] +// * VCVTSD2SI xmm, r32 [AVX512F] +// * VCVTSD2SI xmm, r64 [AVX512F] +// +func (self *Program) VCVTSD2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSD2SI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTSD2SI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTSD2SI takes 2 or 3 operands") + } + // VCVTSD2SI xmm, r32 + if len(vv) == 0 && isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), v[0], 0) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2SI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTSD2SI xmm, r64 + if len(vv) == 0 && isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2SI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x83, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTSD2SI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTSD2SI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTSD2SI {er}, xmm, r32 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2SI {er}, xmm, r64 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2SI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit(0x48) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2SI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit(0x48) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSD2SI") + } + return p +} + +// VCVTSD2SS performs "Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value". +// +// Mnemonic : VCVTSD2SS +// Supported forms : (5 forms) +// +// * VCVTSD2SS xmm, xmm, xmm [AVX] +// * VCVTSD2SS m64, xmm, xmm [AVX] +// * VCVTSD2SS m64, xmm, xmm{k}{z} [AVX512F] +// * VCVTSD2SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VCVTSD2SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VCVTSD2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSD2SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTSD2SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTSD2SS takes 3 or 4 operands") + } + // VCVTSD2SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2SS m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSD2SS m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTSD2SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSD2SS") + } + return p +} + +// VCVTSD2USI performs "Convert Scalar Double-Precision Floating-Point Value to Unsigned Doubleword Integer". +// +// Mnemonic : VCVTSD2USI +// Supported forms : (6 forms) +// +// * VCVTSD2USI m64, r32 [AVX512F] +// * VCVTSD2USI m64, r64 [AVX512F] +// * VCVTSD2USI {er}, xmm, r32 [AVX512F] +// * VCVTSD2USI {er}, xmm, r64 [AVX512F] +// * VCVTSD2USI xmm, r32 [AVX512F] +// * VCVTSD2USI xmm, r64 [AVX512F] +// +func (self *Program) VCVTSD2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSD2USI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTSD2USI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTSD2USI takes 2 or 3 operands") + } + // VCVTSD2USI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTSD2USI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTSD2USI {er}, xmm, r32 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2USI {er}, xmm, r64 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSD2USI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit(0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSD2USI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit(0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSD2USI") + } + return p +} + +// VCVTSI2SD performs "Convert Dword Integer to Scalar Double-Precision FP Value". +// +// Mnemonic : VCVTSI2SD +// Supported forms : (9 forms) +// +// * VCVTSI2SD r32, xmm, xmm [AVX] +// * VCVTSI2SD r64, xmm, xmm [AVX] +// * VCVTSI2SD m32, xmm, xmm [AVX] +// * VCVTSI2SD m64, xmm, xmm [AVX] +// * VCVTSI2SD r32, xmm, xmm [AVX512F] +// * VCVTSI2SD m32, xmm, xmm [AVX512F] +// * VCVTSI2SD m64, xmm, xmm [AVX512F] +// * VCVTSI2SD {er}, r64, xmm, xmm [AVX512F] +// * VCVTSI2SD r64, xmm, xmm [AVX512F] +// +func (self *Program) VCVTSI2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSI2SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTSI2SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTSI2SD takes 3 or 4 operands") + } + // VCVTSI2SD r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SD r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb ^ (hlcode(v[1]) << 3)) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SD m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSI2SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x83, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSI2SD r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SD m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTSI2SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTSI2SD {er}, r64, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSI2SD r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSI2SD") + } + return p +} + +// VCVTSI2SS performs "Convert Dword Integer to Scalar Single-Precision FP Value". +// +// Mnemonic : VCVTSI2SS +// Supported forms : (10 forms) +// +// * VCVTSI2SS r32, xmm, xmm [AVX] +// * VCVTSI2SS r64, xmm, xmm [AVX] +// * VCVTSI2SS m32, xmm, xmm [AVX] +// * VCVTSI2SS m64, xmm, xmm [AVX] +// * VCVTSI2SS m32, xmm, xmm [AVX512F] +// * VCVTSI2SS m64, xmm, xmm [AVX512F] +// * VCVTSI2SS {er}, r32, xmm, xmm [AVX512F] +// * VCVTSI2SS {er}, r64, xmm, xmm [AVX512F] +// * VCVTSI2SS r32, xmm, xmm [AVX512F] +// * VCVTSI2SS r64, xmm, xmm [AVX512F] +// +func (self *Program) VCVTSI2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSI2SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTSI2SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTSI2SS takes 3 or 4 operands") + } + // VCVTSI2SS r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SS r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfa ^ (hlcode(v[1]) << 3)) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSI2SS m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x82, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSI2SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTSI2SS m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTSI2SS {er}, r32, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSI2SS {er}, r64, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfe ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSI2SS r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSI2SS r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSI2SS") + } + return p +} + +// VCVTSS2SD performs "Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value". +// +// Mnemonic : VCVTSS2SD +// Supported forms : (5 forms) +// +// * VCVTSS2SD xmm, xmm, xmm [AVX] +// * VCVTSS2SD m32, xmm, xmm [AVX] +// * VCVTSS2SD m32, xmm, xmm{k}{z} [AVX512F] +// * VCVTSS2SD {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VCVTSS2SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VCVTSS2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSS2SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTSS2SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTSS2SD takes 3 or 4 operands") + } + // VCVTSS2SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2SD m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VCVTSS2SD m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5a) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTSS2SD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSS2SD") + } + return p +} + +// VCVTSS2SI performs "Convert Scalar Single-Precision FP Value to Dword Integer". +// +// Mnemonic : VCVTSS2SI +// Supported forms : (10 forms) +// +// * VCVTSS2SI xmm, r32 [AVX] +// * VCVTSS2SI m32, r32 [AVX] +// * VCVTSS2SI xmm, r64 [AVX] +// * VCVTSS2SI m32, r64 [AVX] +// * VCVTSS2SI m32, r32 [AVX512F] +// * VCVTSS2SI m32, r64 [AVX512F] +// * VCVTSS2SI {er}, xmm, r32 [AVX512F] +// * VCVTSS2SI {er}, xmm, r64 [AVX512F] +// * VCVTSS2SI xmm, r32 [AVX512F] +// * VCVTSS2SI xmm, r64 [AVX512F] +// +func (self *Program) VCVTSS2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSS2SI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTSS2SI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTSS2SI takes 2 or 3 operands") + } + // VCVTSS2SI xmm, r32 + if len(vv) == 0 && isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2SI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTSS2SI xmm, r64 + if len(vv) == 0 && isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfa) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2SI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x82, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTSS2SI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTSS2SI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2d) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTSS2SI {er}, xmm, r32 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2SI {er}, xmm, r64 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2SI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2SI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSS2SI") + } + return p +} + +// VCVTSS2USI performs "Convert Scalar Single-Precision Floating-Point Value to Unsigned Doubleword Integer". +// +// Mnemonic : VCVTSS2USI +// Supported forms : (6 forms) +// +// * VCVTSS2USI m32, r32 [AVX512F] +// * VCVTSS2USI m32, r64 [AVX512F] +// * VCVTSS2USI {er}, xmm, r32 [AVX512F] +// * VCVTSS2USI {er}, xmm, r64 [AVX512F] +// * VCVTSS2USI xmm, r32 [AVX512F] +// * VCVTSS2USI xmm, r64 [AVX512F] +// +func (self *Program) VCVTSS2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTSS2USI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTSS2USI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTSS2USI takes 2 or 3 operands") + } + // VCVTSS2USI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTSS2USI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTSS2USI {er}, xmm, r32 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2USI {er}, xmm, r64 + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit((vcode(v[0]) << 5) | 0x18) + m.emit(0x79) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTSS2USI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTSS2USI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTSS2USI") + } + return p +} + +// VCVTTPD2DQ performs "Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : VCVTTPD2DQ +// Supported forms : (11 forms) +// +// * VCVTTPD2DQ xmm, xmm [AVX] +// * VCVTTPD2DQ ymm, xmm [AVX] +// * VCVTTPD2DQ m128, xmm [AVX] +// * VCVTTPD2DQ m256, xmm [AVX] +// * VCVTTPD2DQ m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTTPD2DQ {sae}, zmm, ymm{k}{z} [AVX512F] +// * VCVTTPD2DQ zmm, ymm{k}{z} [AVX512F] +// * VCVTTPD2DQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2DQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2DQ ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTTPD2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPD2DQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPD2DQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPD2DQ takes 2 or 3 operands") + } + // VCVTTPD2DQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2DQ ymm, xmm + if len(vv) == 0 && isYMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2DQ m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTPD2DQ m256, xmm + if len(vv) == 0 && isM256(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTPD2DQ m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPD2DQ {sae}, zmm, ymm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPD2DQ zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2DQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPD2DQ m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xe6) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPD2DQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2DQ ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xe6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPD2DQ") + } + return p +} + +// VCVTTPD2QQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Quadword Integers". +// +// Mnemonic : VCVTTPD2QQ +// Supported forms : (7 forms) +// +// * VCVTTPD2QQ m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2QQ {sae}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2QQ zmm, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2QQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2QQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2QQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTTPD2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPD2QQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPD2QQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPD2QQ takes 2 or 3 operands") + } + // VCVTTPD2QQ m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPD2QQ {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPD2QQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2QQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPD2QQ m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPD2QQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2QQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPD2QQ") + } + return p +} + +// VCVTTPD2UDQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers". +// +// Mnemonic : VCVTTPD2UDQ +// Supported forms : (7 forms) +// +// * VCVTTPD2UDQ m512/m64bcst, ymm{k}{z} [AVX512F] +// * VCVTTPD2UDQ {sae}, zmm, ymm{k}{z} [AVX512F] +// * VCVTTPD2UDQ zmm, ymm{k}{z} [AVX512F] +// * VCVTTPD2UDQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2UDQ m256/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPD2UDQ ymm, xmm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTTPD2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPD2UDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPD2UDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPD2UDQ takes 2 or 3 operands") + } + // VCVTTPD2UDQ m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPD2UDQ {sae}, zmm, ymm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPD2UDQ zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2UDQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPD2UDQ m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x84, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPD2UDQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2UDQ ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfc) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPD2UDQ") + } + return p +} + +// VCVTTPD2UQQ performs "Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers". +// +// Mnemonic : VCVTTPD2UQQ +// Supported forms : (7 forms) +// +// * VCVTTPD2UQQ m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2UQQ {sae}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2UQQ zmm, zmm{k}{z} [AVX512DQ] +// * VCVTTPD2UQQ m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2UQQ m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPD2UQQ ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTTPD2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPD2UQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPD2UQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPD2UQQ takes 2 or 3 operands") + } + // VCVTTPD2UQQ m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPD2UQQ {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPD2UQQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2UQQ m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPD2UQQ m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPD2UQQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPD2UQQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPD2UQQ") + } + return p +} + +// VCVTTPS2DQ performs "Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers". +// +// Mnemonic : VCVTTPS2DQ +// Supported forms : (11 forms) +// +// * VCVTTPS2DQ xmm, xmm [AVX] +// * VCVTTPS2DQ m128, xmm [AVX] +// * VCVTTPS2DQ ymm, ymm [AVX] +// * VCVTTPS2DQ m256, ymm [AVX] +// * VCVTTPS2DQ m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTTPS2DQ {sae}, zmm, zmm{k}{z} [AVX512F] +// * VCVTTPS2DQ zmm, zmm{k}{z} [AVX512F] +// * VCVTTPS2DQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2DQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2DQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2DQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTTPS2DQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPS2DQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPS2DQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPS2DQ takes 2 or 3 operands") + } + // VCVTTPS2DQ xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2DQ m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTPS2DQ ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2DQ m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTPS2DQ m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPS2DQ {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPS2DQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2DQ m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPS2DQ m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x5b) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPS2DQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2DQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x5b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPS2DQ") + } + return p +} + +// VCVTTPS2QQ performs "Convert with Truncation Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values". +// +// Mnemonic : VCVTTPS2QQ +// Supported forms : (7 forms) +// +// * VCVTTPS2QQ m256/m32bcst, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2QQ {sae}, ymm, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2QQ ymm, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2QQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2QQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2QQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2QQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTTPS2QQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPS2QQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPS2QQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPS2QQ takes 2 or 3 operands") + } + // VCVTTPS2QQ m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPS2QQ {sae}, ymm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPS2QQ ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2QQ m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTPS2QQ m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPS2QQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2QQ xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPS2QQ") + } + return p +} + +// VCVTTPS2UDQ performs "Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values". +// +// Mnemonic : VCVTTPS2UDQ +// Supported forms : (7 forms) +// +// * VCVTTPS2UDQ m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTTPS2UDQ {sae}, zmm, zmm{k}{z} [AVX512F] +// * VCVTTPS2UDQ zmm, zmm{k}{z} [AVX512F] +// * VCVTTPS2UDQ m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2UDQ m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2UDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTTPS2UDQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTTPS2UDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPS2UDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPS2UDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPS2UDQ takes 2 or 3 operands") + } + // VCVTTPS2UDQ m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTTPS2UDQ {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPS2UDQ zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2UDQ m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPS2UDQ m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPS2UDQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2UDQ ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPS2UDQ") + } + return p +} + +// VCVTTPS2UQQ performs "Convert with Truncation Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values". +// +// Mnemonic : VCVTTPS2UQQ +// Supported forms : (7 forms) +// +// * VCVTTPS2UQQ m256/m32bcst, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2UQQ {sae}, ymm, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2UQQ ymm, zmm{k}{z} [AVX512DQ] +// * VCVTTPS2UQQ m64/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2UQQ m128/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2UQQ xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTTPS2UQQ xmm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTTPS2UQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTPS2UQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTPS2UQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTPS2UQQ takes 2 or 3 operands") + } + // VCVTTPS2UQQ m256/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTTPS2UQQ {sae}, ymm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXYMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTPS2UQQ ymm, zmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2UQQ m64/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTPS2UQQ m128/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTTPS2UQQ xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTPS2UQQ xmm, ymm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTPS2UQQ") + } + return p +} + +// VCVTTSD2SI performs "Convert with Truncation Scalar Double-Precision FP Value to Signed Integer". +// +// Mnemonic : VCVTTSD2SI +// Supported forms : (10 forms) +// +// * VCVTTSD2SI xmm, r32 [AVX] +// * VCVTTSD2SI m64, r32 [AVX] +// * VCVTTSD2SI xmm, r64 [AVX] +// * VCVTTSD2SI m64, r64 [AVX] +// * VCVTTSD2SI m64, r32 [AVX512F] +// * VCVTTSD2SI m64, r64 [AVX512F] +// * VCVTTSD2SI {sae}, xmm, r32 [AVX512F] +// * VCVTTSD2SI {sae}, xmm, r64 [AVX512F] +// * VCVTTSD2SI xmm, r32 [AVX512F] +// * VCVTTSD2SI xmm, r64 [AVX512F] +// +func (self *Program) VCVTTSD2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTSD2SI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTSD2SI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTSD2SI takes 2 or 3 operands") + } + // VCVTTSD2SI xmm, r32 + if len(vv) == 0 && isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), v[0], 0) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSD2SI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTSD2SI xmm, r64 + if len(vv) == 0 && isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfb) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSD2SI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x83, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTSD2SI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTSD2SI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTSD2SI {sae}, xmm, r32 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit(0x18) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSD2SI {sae}, xmm, r64 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit(0x18) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSD2SI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit(0x48) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSD2SI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit(0x48) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTSD2SI") + } + return p +} + +// VCVTTSD2USI performs "Convert with Truncation Scalar Double-Precision Floating-Point Value to Unsigned Integer". +// +// Mnemonic : VCVTTSD2USI +// Supported forms : (6 forms) +// +// * VCVTTSD2USI m64, r32 [AVX512F] +// * VCVTTSD2USI m64, r64 [AVX512F] +// * VCVTTSD2USI {sae}, xmm, r32 [AVX512F] +// * VCVTTSD2USI {sae}, xmm, r64 [AVX512F] +// * VCVTTSD2USI xmm, r32 [AVX512F] +// * VCVTTSD2USI xmm, r64 [AVX512F] +// +func (self *Program) VCVTTSD2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTSD2USI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTSD2USI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTSD2USI takes 2 or 3 operands") + } + // VCVTTSD2USI m64, r32 + if len(vv) == 0 && isM64(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTSD2USI m64, r64 + if len(vv) == 0 && isM64(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTTSD2USI {sae}, xmm, r32 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit(0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSD2USI {sae}, xmm, r64 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit(0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSD2USI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit(0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSD2USI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit(0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTSD2USI") + } + return p +} + +// VCVTTSS2SI performs "Convert with Truncation Scalar Single-Precision FP Value to Dword Integer". +// +// Mnemonic : VCVTTSS2SI +// Supported forms : (10 forms) +// +// * VCVTTSS2SI xmm, r32 [AVX] +// * VCVTTSS2SI m32, r32 [AVX] +// * VCVTTSS2SI xmm, r64 [AVX] +// * VCVTTSS2SI m32, r64 [AVX] +// * VCVTTSS2SI m32, r32 [AVX512F] +// * VCVTTSS2SI m32, r64 [AVX512F] +// * VCVTTSS2SI {sae}, xmm, r32 [AVX512F] +// * VCVTTSS2SI {sae}, xmm, r64 [AVX512F] +// * VCVTTSS2SI xmm, r32 [AVX512F] +// * VCVTTSS2SI xmm, r64 [AVX512F] +// +func (self *Program) VCVTTSS2SI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTSS2SI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTSS2SI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTSS2SI takes 2 or 3 operands") + } + // VCVTTSS2SI xmm, r32 + if len(vv) == 0 && isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSS2SI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTSS2SI xmm, r64 + if len(vv) == 0 && isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfa) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSS2SI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x82, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VCVTTSS2SI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTTSS2SI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2c) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTTSS2SI {sae}, xmm, r32 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit(0x18) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSS2SI {sae}, xmm, r64 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit(0x18) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSS2SI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSS2SI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTSS2SI") + } + return p +} + +// VCVTTSS2USI performs "Convert with Truncation Scalar Single-Precision Floating-Point Value to Unsigned Integer". +// +// Mnemonic : VCVTTSS2USI +// Supported forms : (6 forms) +// +// * VCVTTSS2USI m32, r32 [AVX512F] +// * VCVTTSS2USI m32, r64 [AVX512F] +// * VCVTTSS2USI {sae}, xmm, r32 [AVX512F] +// * VCVTTSS2USI {sae}, xmm, r64 [AVX512F] +// * VCVTTSS2USI xmm, r32 [AVX512F] +// * VCVTTSS2USI xmm, r64 [AVX512F] +// +func (self *Program) VCVTTSS2USI(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTTSS2USI", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTTSS2USI", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTTSS2USI takes 2 or 3 operands") + } + // VCVTTSS2USI m32, r32 + if len(vv) == 0 && isM32(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTTSS2USI m32, r64 + if len(vv) == 0 && isM32(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VCVTTSS2USI {sae}, xmm, r32 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg32(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit(0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSS2USI {sae}, xmm, r64 + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isReg64(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit(0x18) + m.emit(0x78) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTTSS2USI xmm, r32 + if len(vv) == 0 && isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTTSS2USI xmm, r64 + if len(vv) == 0 && isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTTSS2USI") + } + return p +} + +// VCVTUDQ2PD performs "Convert Packed Unsigned Doubleword Integers to Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VCVTUDQ2PD +// Supported forms : (6 forms) +// +// * VCVTUDQ2PD m256/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PD ymm, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PD m64/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PD m128/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTUDQ2PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VCVTUDQ2PD", 2, Operands { v0, v1 }) + // VCVTUDQ2PD m256/m32bcst, zmm{k}{z} + if isM256M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTUDQ2PD ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUDQ2PD m64/m32bcst, xmm{k}{z} + if isM64M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VCVTUDQ2PD m128/m32bcst, ymm{k}{z} + if isM128M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTUDQ2PD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUDQ2PD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUDQ2PD") + } + return p +} + +// VCVTUDQ2PS performs "Convert Packed Unsigned Doubleword Integers to Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VCVTUDQ2PS +// Supported forms : (7 forms) +// +// * VCVTUDQ2PS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PS {er}, zmm, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PS zmm, zmm{k}{z} [AVX512F] +// * VCVTUDQ2PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VCVTUDQ2PS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VCVTUDQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUDQ2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTUDQ2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTUDQ2PS takes 2 or 3 operands") + } + // VCVTUDQ2PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTUDQ2PS {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTUDQ2PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUDQ2PS m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTUDQ2PS m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTUDQ2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUDQ2PS ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUDQ2PS") + } + return p +} + +// VCVTUQQ2PD performs "Convert Packed Unsigned Quadword Integers to Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VCVTUQQ2PD +// Supported forms : (7 forms) +// +// * VCVTUQQ2PD m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VCVTUQQ2PD {er}, zmm, zmm{k}{z} [AVX512DQ] +// * VCVTUQQ2PD zmm, zmm{k}{z} [AVX512DQ] +// * VCVTUQQ2PD m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PD m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PD xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PD ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTUQQ2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUQQ2PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTUQQ2PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTUQQ2PD takes 2 or 3 operands") + } + // VCVTUQQ2PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTUQQ2PD {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTUQQ2PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUQQ2PD m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTUQQ2PD m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTUQQ2PD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUQQ2PD ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUQQ2PD") + } + return p +} + +// VCVTUQQ2PS performs "Convert Packed Unsigned Quadword Integers to Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VCVTUQQ2PS +// Supported forms : (7 forms) +// +// * VCVTUQQ2PS m512/m64bcst, ymm{k}{z} [AVX512DQ] +// * VCVTUQQ2PS {er}, zmm, ymm{k}{z} [AVX512DQ] +// * VCVTUQQ2PS zmm, ymm{k}{z} [AVX512DQ] +// * VCVTUQQ2PS m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PS m256/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PS xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VCVTUQQ2PS ymm, xmm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VCVTUQQ2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUQQ2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VCVTUQQ2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VCVTUQQ2PS takes 2 or 3 operands") + } + // VCVTUQQ2PS m512/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VCVTUQQ2PS {er}, zmm, ymm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isYMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VCVTUQQ2PS zmm, ymm{k}{z} + if len(vv) == 0 && isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUQQ2PS m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VCVTUQQ2PS m256/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x7a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VCVTUQQ2PS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VCVTUQQ2PS ymm, xmm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUQQ2PS") + } + return p +} + +// VCVTUSI2SD performs "Convert Unsigned Integer to Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VCVTUSI2SD +// Supported forms : (5 forms) +// +// * VCVTUSI2SD r32, xmm, xmm [AVX512F] +// * VCVTUSI2SD m32, xmm, xmm [AVX512F] +// * VCVTUSI2SD m64, xmm, xmm [AVX512F] +// * VCVTUSI2SD {er}, r64, xmm, xmm [AVX512F] +// * VCVTUSI2SD r64, xmm, xmm [AVX512F] +// +func (self *Program) VCVTUSI2SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUSI2SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTUSI2SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTUSI2SD takes 3 or 4 operands") + } + // VCVTUSI2SD r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTUSI2SD m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x7b) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTUSI2SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x7b) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTUSI2SD {er}, r64, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTUSI2SD r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUSI2SD") + } + return p +} + +// VCVTUSI2SS performs "Convert Unsigned Integer to Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VCVTUSI2SS +// Supported forms : (6 forms) +// +// * VCVTUSI2SS m32, xmm, xmm [AVX512F] +// * VCVTUSI2SS m64, xmm, xmm [AVX512F] +// * VCVTUSI2SS {er}, r32, xmm, xmm [AVX512F] +// * VCVTUSI2SS {er}, r64, xmm, xmm [AVX512F] +// * VCVTUSI2SS r32, xmm, xmm [AVX512F] +// * VCVTUSI2SS r64, xmm, xmm [AVX512F] +// +func (self *Program) VCVTUSI2SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VCVTUSI2SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VCVTUSI2SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VCVTUSI2SS takes 3 or 4 operands") + } + // VCVTUSI2SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x7b) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VCVTUSI2SS m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x7b) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VCVTUSI2SS {er}, r32, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTUSI2SS {er}, r64, xmm, xmm + if len(vv) == 1 && isER(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfe ^ (hlcode(v[2]) << 3)) + m.emit((vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | 0x10) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VCVTUSI2SS r32, xmm, xmm + if len(vv) == 0 && isReg32(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VCVTUSI2SS r64, xmm, xmm + if len(vv) == 0 && isReg64(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VCVTUSI2SS") + } + return p +} + +// VDBPSADBW performs "Double Block Packed Sum-Absolute-Differences on Unsigned Bytes". +// +// Mnemonic : VDBPSADBW +// Supported forms : (6 forms) +// +// * VDBPSADBW imm8, zmm, zmm, zmm{k}{z} [AVX512BW] +// * VDBPSADBW imm8, m512, zmm, zmm{k}{z} [AVX512BW] +// * VDBPSADBW imm8, xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VDBPSADBW imm8, m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VDBPSADBW imm8, ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VDBPSADBW imm8, m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VDBPSADBW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VDBPSADBW", 4, Operands { v0, v1, v2, v3 }) + // VDBPSADBW imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, m512, zmm, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, m128, xmm, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDBPSADBW imm8, m256, ymm, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDBPSADBW") + } + return p +} + +// VDIVPD performs "Divide Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VDIVPD +// Supported forms : (11 forms) +// +// * VDIVPD xmm, xmm, xmm [AVX] +// * VDIVPD m128, xmm, xmm [AVX] +// * VDIVPD ymm, ymm, ymm [AVX] +// * VDIVPD m256, ymm, ymm [AVX] +// * VDIVPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VDIVPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VDIVPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VDIVPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VDIVPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VDIVPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VDIVPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VDIVPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VDIVPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VDIVPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VDIVPD takes 3 or 4 operands") + } + // VDIVPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VDIVPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VDIVPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VDIVPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VDIVPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDIVPD") + } + return p +} + +// VDIVPS performs "Divide Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VDIVPS +// Supported forms : (11 forms) +// +// * VDIVPS xmm, xmm, xmm [AVX] +// * VDIVPS m128, xmm, xmm [AVX] +// * VDIVPS ymm, ymm, ymm [AVX] +// * VDIVPS m256, ymm, ymm [AVX] +// * VDIVPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VDIVPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VDIVPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VDIVPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VDIVPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VDIVPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VDIVPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VDIVPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VDIVPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VDIVPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VDIVPS takes 3 or 4 operands") + } + // VDIVPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VDIVPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VDIVPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VDIVPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VDIVPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDIVPS") + } + return p +} + +// VDIVSD performs "Divide Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VDIVSD +// Supported forms : (5 forms) +// +// * VDIVSD xmm, xmm, xmm [AVX] +// * VDIVSD m64, xmm, xmm [AVX] +// * VDIVSD m64, xmm, xmm{k}{z} [AVX512F] +// * VDIVSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VDIVSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VDIVSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VDIVSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VDIVSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VDIVSD takes 3 or 4 operands") + } + // VDIVSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VDIVSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VDIVSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDIVSD") + } + return p +} + +// VDIVSS performs "Divide Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VDIVSS +// Supported forms : (5 forms) +// +// * VDIVSS xmm, xmm, xmm [AVX] +// * VDIVSS m32, xmm, xmm [AVX] +// * VDIVSS m32, xmm, xmm{k}{z} [AVX512F] +// * VDIVSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VDIVSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VDIVSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VDIVSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VDIVSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VDIVSS takes 3 or 4 operands") + } + // VDIVSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VDIVSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VDIVSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5e) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VDIVSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VDIVSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDIVSS") + } + return p +} + +// VDPPD performs "Dot Product of Packed Double Precision Floating-Point Values". +// +// Mnemonic : VDPPD +// Supported forms : (2 forms) +// +// * VDPPD imm8, xmm, xmm, xmm [AVX] +// * VDPPD imm8, m128, xmm, xmm [AVX] +// +func (self *Program) VDPPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VDPPD", 4, Operands { v0, v1, v2, v3 }) + // VDPPD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x41) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDPPD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x41) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDPPD") + } + return p +} + +// VDPPS performs "Dot Product of Packed Single Precision Floating-Point Values". +// +// Mnemonic : VDPPS +// Supported forms : (4 forms) +// +// * VDPPS imm8, xmm, xmm, xmm [AVX] +// * VDPPS imm8, m128, xmm, xmm [AVX] +// * VDPPS imm8, ymm, ymm, ymm [AVX] +// * VDPPS imm8, m256, ymm, ymm [AVX] +// +func (self *Program) VDPPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VDPPS", 4, Operands { v0, v1, v2, v3 }) + // VDPPS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x40) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDPPS imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x40) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VDPPS imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x40) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VDPPS imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x40) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VDPPS") + } + return p +} + +// VEXP2PD performs "Approximation to the Exponential 2^x of Packed Double-Precision Floating-Point Values with Less Than 2^-23 Relative Error". +// +// Mnemonic : VEXP2PD +// Supported forms : (3 forms) +// +// * VEXP2PD m512/m64bcst, zmm{k}{z} [AVX512ER] +// * VEXP2PD {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VEXP2PD zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VEXP2PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VEXP2PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VEXP2PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VEXP2PD takes 2 or 3 operands") + } + // VEXP2PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc8) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VEXP2PD {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VEXP2PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXP2PD") + } + return p +} + +// VEXP2PS performs "Approximation to the Exponential 2^x of Packed Single-Precision Floating-Point Values with Less Than 2^-23 Relative Error". +// +// Mnemonic : VEXP2PS +// Supported forms : (3 forms) +// +// * VEXP2PS m512/m32bcst, zmm{k}{z} [AVX512ER] +// * VEXP2PS {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VEXP2PS zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VEXP2PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VEXP2PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VEXP2PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VEXP2PS takes 2 or 3 operands") + } + // VEXP2PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc8) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VEXP2PS {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VEXP2PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xc8) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXP2PS") + } + return p +} + +// VEXPANDPD performs "Load Sparse Packed Double-Precision Floating-Point Values from Dense Memory". +// +// Mnemonic : VEXPANDPD +// Supported forms : (6 forms) +// +// * VEXPANDPD zmm, zmm{k}{z} [AVX512F] +// * VEXPANDPD m512, zmm{k}{z} [AVX512F] +// * VEXPANDPD xmm, xmm{k}{z} [AVX512VL] +// * VEXPANDPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VEXPANDPD m128, xmm{k}{z} [AVX512VL] +// * VEXPANDPD m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VEXPANDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VEXPANDPD", 2, Operands { v0, v1 }) + // VEXPANDPD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPD m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VEXPANDPD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPD m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VEXPANDPD m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VEXPANDPD") + } + return p +} + +// VEXPANDPS performs "Load Sparse Packed Single-Precision Floating-Point Values from Dense Memory". +// +// Mnemonic : VEXPANDPS +// Supported forms : (6 forms) +// +// * VEXPANDPS zmm, zmm{k}{z} [AVX512F] +// * VEXPANDPS m512, zmm{k}{z} [AVX512F] +// * VEXPANDPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VEXPANDPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VEXPANDPS m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VEXPANDPS m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VEXPANDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VEXPANDPS", 2, Operands { v0, v1 }) + // VEXPANDPS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPS m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VEXPANDPS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x88) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VEXPANDPS m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VEXPANDPS m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x88) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VEXPANDPS") + } + return p +} + +// VEXTRACTF128 performs "Extract Packed Floating-Point Values". +// +// Mnemonic : VEXTRACTF128 +// Supported forms : (2 forms) +// +// * VEXTRACTF128 imm8, ymm, xmm [AVX] +// * VEXTRACTF128 imm8, ymm, m128 [AVX] +// +func (self *Program) VEXTRACTF128(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF128", 3, Operands { v0, v1, v2 }) + // VEXTRACTF128 imm8, ymm, xmm + if isImm8(v0) && isYMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF128 imm8, ymm, m128 + if isImm8(v0) && isYMM(v1) && isM128(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF128") + } + return p +} + +// VEXTRACTF32X4 performs "Extract 128 Bits of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VEXTRACTF32X4 +// Supported forms : (4 forms) +// +// * VEXTRACTF32X4 imm8, zmm, xmm{k}{z} [AVX512F] +// * VEXTRACTF32X4 imm8, zmm, m128{k}{z} [AVX512F] +// * VEXTRACTF32X4 imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VEXTRACTF32X4 imm8, ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VEXTRACTF32X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF32X4", 3, Operands { v0, v1, v2 }) + // VEXTRACTF32X4 imm8, zmm, xmm{k}{z} + if isImm8(v0) && isZMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF32X4 imm8, zmm, m128{k}{z} + if isImm8(v0) && isZMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF32X4 imm8, ymm, xmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF32X4 imm8, ymm, m128{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF32X4") + } + return p +} + +// VEXTRACTF32X8 performs "Extract 256 Bits of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VEXTRACTF32X8 +// Supported forms : (2 forms) +// +// * VEXTRACTF32X8 imm8, zmm, ymm{k}{z} [AVX512DQ] +// * VEXTRACTF32X8 imm8, zmm, m256{k}{z} [AVX512DQ] +// +func (self *Program) VEXTRACTF32X8(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF32X8", 3, Operands { v0, v1, v2 }) + // VEXTRACTF32X8 imm8, zmm, ymm{k}{z} + if isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF32X8 imm8, zmm, m256{k}{z} + if isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF32X8") + } + return p +} + +// VEXTRACTF64X2 performs "Extract 128 Bits of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VEXTRACTF64X2 +// Supported forms : (4 forms) +// +// * VEXTRACTF64X2 imm8, zmm, xmm{k}{z} [AVX512DQ] +// * VEXTRACTF64X2 imm8, zmm, m128{k}{z} [AVX512DQ] +// * VEXTRACTF64X2 imm8, ymm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VEXTRACTF64X2 imm8, ymm, m128{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VEXTRACTF64X2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF64X2", 3, Operands { v0, v1, v2 }) + // VEXTRACTF64X2 imm8, zmm, xmm{k}{z} + if isImm8(v0) && isZMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF64X2 imm8, zmm, m128{k}{z} + if isImm8(v0) && isZMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF64X2 imm8, ymm, xmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x19) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF64X2 imm8, ymm, m128{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x19) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF64X2") + } + return p +} + +// VEXTRACTF64X4 performs "Extract 256 Bits of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VEXTRACTF64X4 +// Supported forms : (2 forms) +// +// * VEXTRACTF64X4 imm8, zmm, ymm{k}{z} [AVX512F] +// * VEXTRACTF64X4 imm8, zmm, m256{k}{z} [AVX512F] +// +func (self *Program) VEXTRACTF64X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTF64X4", 3, Operands { v0, v1, v2 }) + // VEXTRACTF64X4 imm8, zmm, ymm{k}{z} + if isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x1b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTF64X4 imm8, zmm, m256{k}{z} + if isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x1b) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTF64X4") + } + return p +} + +// VEXTRACTI128 performs "Extract Packed Integer Values". +// +// Mnemonic : VEXTRACTI128 +// Supported forms : (2 forms) +// +// * VEXTRACTI128 imm8, ymm, xmm [AVX2] +// * VEXTRACTI128 imm8, ymm, m128 [AVX2] +// +func (self *Program) VEXTRACTI128(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI128", 3, Operands { v0, v1, v2 }) + // VEXTRACTI128 imm8, ymm, xmm + if isImm8(v0) && isYMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI128 imm8, ymm, m128 + if isImm8(v0) && isYMM(v1) && isM128(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[1]), addr(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI128") + } + return p +} + +// VEXTRACTI32X4 performs "Extract 128 Bits of Packed Doubleword Integer Values". +// +// Mnemonic : VEXTRACTI32X4 +// Supported forms : (4 forms) +// +// * VEXTRACTI32X4 imm8, zmm, xmm{k}{z} [AVX512F] +// * VEXTRACTI32X4 imm8, zmm, m128{k}{z} [AVX512F] +// * VEXTRACTI32X4 imm8, ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VEXTRACTI32X4 imm8, ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VEXTRACTI32X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI32X4", 3, Operands { v0, v1, v2 }) + // VEXTRACTI32X4 imm8, zmm, xmm{k}{z} + if isImm8(v0) && isZMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI32X4 imm8, zmm, m128{k}{z} + if isImm8(v0) && isZMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI32X4 imm8, ymm, xmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI32X4 imm8, ymm, m128{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI32X4") + } + return p +} + +// VEXTRACTI32X8 performs "Extract 256 Bits of Packed Doubleword Integer Values". +// +// Mnemonic : VEXTRACTI32X8 +// Supported forms : (2 forms) +// +// * VEXTRACTI32X8 imm8, zmm, ymm{k}{z} [AVX512DQ] +// * VEXTRACTI32X8 imm8, zmm, m256{k}{z} [AVX512DQ] +// +func (self *Program) VEXTRACTI32X8(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI32X8", 3, Operands { v0, v1, v2 }) + // VEXTRACTI32X8 imm8, zmm, ymm{k}{z} + if isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI32X8 imm8, zmm, m256{k}{z} + if isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI32X8") + } + return p +} + +// VEXTRACTI64X2 performs "Extract 128 Bits of Packed Quadword Integer Values". +// +// Mnemonic : VEXTRACTI64X2 +// Supported forms : (4 forms) +// +// * VEXTRACTI64X2 imm8, zmm, xmm{k}{z} [AVX512DQ] +// * VEXTRACTI64X2 imm8, zmm, m128{k}{z} [AVX512DQ] +// * VEXTRACTI64X2 imm8, ymm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VEXTRACTI64X2 imm8, ymm, m128{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VEXTRACTI64X2(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI64X2", 3, Operands { v0, v1, v2 }) + // VEXTRACTI64X2 imm8, zmm, xmm{k}{z} + if isImm8(v0) && isZMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI64X2 imm8, zmm, m128{k}{z} + if isImm8(v0) && isZMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI64X2 imm8, ymm, xmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI64X2 imm8, ymm, m128{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isM128kz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x39) + m.mrsd(lcode(v[1]), addr(v[2]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI64X2") + } + return p +} + +// VEXTRACTI64X4 performs "Extract 256 Bits of Packed Quadword Integer Values". +// +// Mnemonic : VEXTRACTI64X4 +// Supported forms : (2 forms) +// +// * VEXTRACTI64X4 imm8, zmm, ymm{k}{z} [AVX512F] +// * VEXTRACTI64X4 imm8, zmm, m256{k}{z} [AVX512F] +// +func (self *Program) VEXTRACTI64X4(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTI64X4", 3, Operands { v0, v1, v2 }) + // VEXTRACTI64X4 imm8, zmm, ymm{k}{z} + if isImm8(v0) && isZMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTI64X4 imm8, zmm, m256{k}{z} + if isImm8(v0) && isZMM(v1) && isM256kz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[1]), addr(v[2]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3b) + m.mrsd(lcode(v[1]), addr(v[2]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTI64X4") + } + return p +} + +// VEXTRACTPS performs "Extract Packed Single Precision Floating-Point Value". +// +// Mnemonic : VEXTRACTPS +// Supported forms : (4 forms) +// +// * VEXTRACTPS imm8, xmm, r32 [AVX] +// * VEXTRACTPS imm8, xmm, m32 [AVX] +// * VEXTRACTPS imm8, xmm, r32 [AVX512F] +// * VEXTRACTPS imm8, xmm, m32 [AVX512F] +// +func (self *Program) VEXTRACTPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VEXTRACTPS", 3, Operands { v0, v1, v2 }) + // VEXTRACTPS imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTPS imm8, xmm, m32 + if isImm8(v0) && isXMM(v1) && isM32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTPS imm8, xmm, r32 + if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VEXTRACTPS imm8, xmm, m32 + if isImm8(v0) && isEVEXXMM(v1) && isM32(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[2]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VEXTRACTPS") + } + return p +} + +// VFIXUPIMMPD performs "Fix Up Special Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFIXUPIMMPD +// Supported forms : (7 forms) +// +// * VFIXUPIMMPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPD imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPD imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFIXUPIMMPD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFIXUPIMMPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFIXUPIMMPD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFIXUPIMMPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFIXUPIMMPD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VFIXUPIMMPD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VFIXUPIMMPD takes 4 or 5 operands") + } + // VFIXUPIMMPD imm8, m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x54) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPD imm8, ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFIXUPIMMPD") + } + return p +} + +// VFIXUPIMMPS performs "Fix Up Special Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFIXUPIMMPS +// Supported forms : (7 forms) +// +// * VFIXUPIMMPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPS imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPS imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFIXUPIMMPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512VL] +// * VFIXUPIMMPS imm8, xmm, xmm, xmm{k}{z} [AVX512VL] +// * VFIXUPIMMPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFIXUPIMMPS imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFIXUPIMMPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFIXUPIMMPS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VFIXUPIMMPS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VFIXUPIMMPS takes 4 or 5 operands") + } + // VFIXUPIMMPS imm8, m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x54) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x54) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMPS imm8, ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x54) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFIXUPIMMPS") + } + return p +} + +// VFIXUPIMMSD performs "Fix Up Special Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VFIXUPIMMSD +// Supported forms : (3 forms) +// +// * VFIXUPIMMSD imm8, m64, xmm, xmm{k}{z} [AVX512F] +// * VFIXUPIMMSD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFIXUPIMMSD imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFIXUPIMMSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFIXUPIMMSD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VFIXUPIMMSD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VFIXUPIMMSD takes 4 or 5 operands") + } + // VFIXUPIMMSD imm8, m64, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x55) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMSD imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x55) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMSD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x55) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFIXUPIMMSD") + } + return p +} + +// VFIXUPIMMSS performs "Fix Up Special Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VFIXUPIMMSS +// Supported forms : (3 forms) +// +// * VFIXUPIMMSS imm8, m32, xmm, xmm{k}{z} [AVX512F] +// * VFIXUPIMMSS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFIXUPIMMSS imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFIXUPIMMSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFIXUPIMMSS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VFIXUPIMMSS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VFIXUPIMMSS takes 4 or 5 operands") + } + // VFIXUPIMMSS imm8, m32, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x55) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMSS imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x55) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VFIXUPIMMSS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x55) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFIXUPIMMSS") + } + return p +} + +// VFMADD132PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD132PD +// Supported forms : (11 forms) +// +// * VFMADD132PD xmm, xmm, xmm [FMA3] +// * VFMADD132PD m128, xmm, xmm [FMA3] +// * VFMADD132PD ymm, ymm, ymm [FMA3] +// * VFMADD132PD m256, ymm, ymm [FMA3] +// * VFMADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD132PD takes 3 or 4 operands") + } + // VFMADD132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x98) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD132PD") + } + return p +} + +// VFMADD132PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD132PS +// Supported forms : (11 forms) +// +// * VFMADD132PS xmm, xmm, xmm [FMA3] +// * VFMADD132PS m128, xmm, xmm [FMA3] +// * VFMADD132PS ymm, ymm, ymm [FMA3] +// * VFMADD132PS m256, ymm, ymm [FMA3] +// * VFMADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD132PS takes 3 or 4 operands") + } + // VFMADD132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x98) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD132PS") + } + return p +} + +// VFMADD132SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD132SD +// Supported forms : (5 forms) +// +// * VFMADD132SD xmm, xmm, xmm [FMA3] +// * VFMADD132SD m64, xmm, xmm [FMA3] +// * VFMADD132SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMADD132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD132SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD132SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD132SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD132SD takes 3 or 4 operands") + } + // VFMADD132SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMADD132SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x99) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD132SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD132SD") + } + return p +} + +// VFMADD132SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD132SS +// Supported forms : (5 forms) +// +// * VFMADD132SS xmm, xmm, xmm [FMA3] +// * VFMADD132SS m32, xmm, xmm [FMA3] +// * VFMADD132SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMADD132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD132SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD132SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD132SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD132SS takes 3 or 4 operands") + } + // VFMADD132SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD132SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD132SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMADD132SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x99) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD132SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD132SS") + } + return p +} + +// VFMADD213PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD213PD +// Supported forms : (11 forms) +// +// * VFMADD213PD xmm, xmm, xmm [FMA3] +// * VFMADD213PD m128, xmm, xmm [FMA3] +// * VFMADD213PD ymm, ymm, ymm [FMA3] +// * VFMADD213PD m256, ymm, ymm [FMA3] +// * VFMADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD213PD takes 3 or 4 operands") + } + // VFMADD213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD213PD") + } + return p +} + +// VFMADD213PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD213PS +// Supported forms : (11 forms) +// +// * VFMADD213PS xmm, xmm, xmm [FMA3] +// * VFMADD213PS m128, xmm, xmm [FMA3] +// * VFMADD213PS ymm, ymm, ymm [FMA3] +// * VFMADD213PS m256, ymm, ymm [FMA3] +// * VFMADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD213PS takes 3 or 4 operands") + } + // VFMADD213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD213PS") + } + return p +} + +// VFMADD213SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD213SD +// Supported forms : (5 forms) +// +// * VFMADD213SD xmm, xmm, xmm [FMA3] +// * VFMADD213SD m64, xmm, xmm [FMA3] +// * VFMADD213SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMADD213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD213SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD213SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD213SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD213SD takes 3 or 4 operands") + } + // VFMADD213SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xa9) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMADD213SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD213SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD213SD") + } + return p +} + +// VFMADD213SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD213SS +// Supported forms : (5 forms) +// +// * VFMADD213SS xmm, xmm, xmm [FMA3] +// * VFMADD213SS m32, xmm, xmm [FMA3] +// * VFMADD213SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMADD213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD213SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD213SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD213SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD213SS takes 3 or 4 operands") + } + // VFMADD213SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD213SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD213SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xa9) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMADD213SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD213SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD213SS") + } + return p +} + +// VFMADD231PD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD231PD +// Supported forms : (11 forms) +// +// * VFMADD231PD xmm, xmm, xmm [FMA3] +// * VFMADD231PD m128, xmm, xmm [FMA3] +// * VFMADD231PD ymm, ymm, ymm [FMA3] +// * VFMADD231PD m256, ymm, ymm [FMA3] +// * VFMADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD231PD takes 3 or 4 operands") + } + // VFMADD231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD231PD") + } + return p +} + +// VFMADD231PS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD231PS +// Supported forms : (11 forms) +// +// * VFMADD231PS xmm, xmm, xmm [FMA3] +// * VFMADD231PS m128, xmm, xmm [FMA3] +// * VFMADD231PS ymm, ymm, ymm [FMA3] +// * VFMADD231PS m256, ymm, ymm [FMA3] +// * VFMADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD231PS takes 3 or 4 operands") + } + // VFMADD231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADD231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADD231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADD231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD231PS") + } + return p +} + +// VFMADD231SD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADD231SD +// Supported forms : (5 forms) +// +// * VFMADD231SD xmm, xmm, xmm [FMA3] +// * VFMADD231SD m64, xmm, xmm [FMA3] +// * VFMADD231SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMADD231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD231SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD231SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD231SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD231SD takes 3 or 4 operands") + } + // VFMADD231SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xb9) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMADD231SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD231SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD231SD") + } + return p +} + +// VFMADD231SS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADD231SS +// Supported forms : (5 forms) +// +// * VFMADD231SS xmm, xmm, xmm [FMA3] +// * VFMADD231SS m32, xmm, xmm [FMA3] +// * VFMADD231SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMADD231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMADD231SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMADD231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADD231SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADD231SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADD231SS takes 3 or 4 operands") + } + // VFMADD231SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADD231SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADD231SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xb9) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMADD231SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADD231SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADD231SS") + } + return p +} + +// VFMADDPD performs "Fused Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDPD +// Supported forms : (6 forms) +// +// * VFMADDPD xmm, xmm, xmm, xmm [FMA4] +// * VFMADDPD m128, xmm, xmm, xmm [FMA4] +// * VFMADDPD xmm, m128, xmm, xmm [FMA4] +// * VFMADDPD ymm, ymm, ymm, ymm [FMA4] +// * VFMADDPD m256, ymm, ymm, ymm [FMA4] +// * VFMADDPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDPD", 4, Operands { v0, v1, v2, v3 }) + // VFMADDPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x69) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x69) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x69) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x69) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x69) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x69) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x69) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x69) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDPD") + } + return p +} + +// VFMADDPS performs "Fused Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDPS +// Supported forms : (6 forms) +// +// * VFMADDPS xmm, xmm, xmm, xmm [FMA4] +// * VFMADDPS m128, xmm, xmm, xmm [FMA4] +// * VFMADDPS xmm, m128, xmm, xmm [FMA4] +// * VFMADDPS ymm, ymm, ymm, ymm [FMA4] +// * VFMADDPS m256, ymm, ymm, ymm [FMA4] +// * VFMADDPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDPS", 4, Operands { v0, v1, v2, v3 }) + // VFMADDPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x68) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x68) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x68) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x68) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x68) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x68) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x68) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x68) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDPS") + } + return p +} + +// VFMADDSD performs "Fused Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSD +// Supported forms : (3 forms) +// +// * VFMADDSD xmm, xmm, xmm, xmm [FMA4] +// * VFMADDSD m64, xmm, xmm, xmm [FMA4] +// * VFMADDSD xmm, m64, xmm, xmm [FMA4] +// +func (self *Program) VFMADDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDSD", 4, Operands { v0, v1, v2, v3 }) + // VFMADDSD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSD m64, xmm, xmm, xmm + if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6b) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSD xmm, m64, xmm, xmm + if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSD") + } + return p +} + +// VFMADDSS performs "Fused Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSS +// Supported forms : (3 forms) +// +// * VFMADDSS xmm, xmm, xmm, xmm [FMA4] +// * VFMADDSS m32, xmm, xmm, xmm [FMA4] +// * VFMADDSS xmm, m32, xmm, xmm [FMA4] +// +func (self *Program) VFMADDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDSS", 4, Operands { v0, v1, v2, v3 }) + // VFMADDSS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSS m32, xmm, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6a) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSS xmm, m32, xmm, xmm + if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSS") + } + return p +} + +// VFMADDSUB132PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB132PD +// Supported forms : (11 forms) +// +// * VFMADDSUB132PD xmm, xmm, xmm [FMA3] +// * VFMADDSUB132PD m128, xmm, xmm [FMA3] +// * VFMADDSUB132PD ymm, ymm, ymm [FMA3] +// * VFMADDSUB132PD m256, ymm, ymm [FMA3] +// * VFMADDSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB132PD takes 3 or 4 operands") + } + // VFMADDSUB132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x96) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB132PD") + } + return p +} + +// VFMADDSUB132PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB132PS +// Supported forms : (11 forms) +// +// * VFMADDSUB132PS xmm, xmm, xmm [FMA3] +// * VFMADDSUB132PS m128, xmm, xmm [FMA3] +// * VFMADDSUB132PS ymm, ymm, ymm [FMA3] +// * VFMADDSUB132PS m256, ymm, ymm [FMA3] +// * VFMADDSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB132PS takes 3 or 4 operands") + } + // VFMADDSUB132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x96) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB132PS") + } + return p +} + +// VFMADDSUB213PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB213PD +// Supported forms : (11 forms) +// +// * VFMADDSUB213PD xmm, xmm, xmm [FMA3] +// * VFMADDSUB213PD m128, xmm, xmm [FMA3] +// * VFMADDSUB213PD ymm, ymm, ymm [FMA3] +// * VFMADDSUB213PD m256, ymm, ymm [FMA3] +// * VFMADDSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB213PD takes 3 or 4 operands") + } + // VFMADDSUB213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB213PD") + } + return p +} + +// VFMADDSUB213PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB213PS +// Supported forms : (11 forms) +// +// * VFMADDSUB213PS xmm, xmm, xmm [FMA3] +// * VFMADDSUB213PS m128, xmm, xmm [FMA3] +// * VFMADDSUB213PS ymm, ymm, ymm [FMA3] +// * VFMADDSUB213PS m256, ymm, ymm [FMA3] +// * VFMADDSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB213PS takes 3 or 4 operands") + } + // VFMADDSUB213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB213PS") + } + return p +} + +// VFMADDSUB231PD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB231PD +// Supported forms : (11 forms) +// +// * VFMADDSUB231PD xmm, xmm, xmm [FMA3] +// * VFMADDSUB231PD m128, xmm, xmm [FMA3] +// * VFMADDSUB231PD ymm, ymm, ymm [FMA3] +// * VFMADDSUB231PD m256, ymm, ymm [FMA3] +// * VFMADDSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB231PD takes 3 or 4 operands") + } + // VFMADDSUB231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB231PD") + } + return p +} + +// VFMADDSUB231PS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUB231PS +// Supported forms : (11 forms) +// +// * VFMADDSUB231PS xmm, xmm, xmm [FMA3] +// * VFMADDSUB231PS m128, xmm, xmm [FMA3] +// * VFMADDSUB231PS ymm, ymm, ymm [FMA3] +// * VFMADDSUB231PS m256, ymm, ymm [FMA3] +// * VFMADDSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMADDSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMADDSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMADDSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMADDSUB231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMADDSUB231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMADDSUB231PS takes 3 or 4 operands") + } + // VFMADDSUB231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMADDSUB231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMADDSUB231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMADDSUB231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMADDSUB231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMADDSUB231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMADDSUB231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUB231PS") + } + return p +} + +// VFMADDSUBPD performs "Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUBPD +// Supported forms : (6 forms) +// +// * VFMADDSUBPD xmm, xmm, xmm, xmm [FMA4] +// * VFMADDSUBPD m128, xmm, xmm, xmm [FMA4] +// * VFMADDSUBPD xmm, m128, xmm, xmm [FMA4] +// * VFMADDSUBPD ymm, ymm, ymm, ymm [FMA4] +// * VFMADDSUBPD m256, ymm, ymm, ymm [FMA4] +// * VFMADDSUBPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMADDSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDSUBPD", 4, Operands { v0, v1, v2, v3 }) + // VFMADDSUBPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSUBPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSUBPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUBPD") + } + return p +} + +// VFMADDSUBPS performs "Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMADDSUBPS +// Supported forms : (6 forms) +// +// * VFMADDSUBPS xmm, xmm, xmm, xmm [FMA4] +// * VFMADDSUBPS m128, xmm, xmm, xmm [FMA4] +// * VFMADDSUBPS xmm, m128, xmm, xmm [FMA4] +// * VFMADDSUBPS ymm, ymm, ymm, ymm [FMA4] +// * VFMADDSUBPS m256, ymm, ymm, ymm [FMA4] +// * VFMADDSUBPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMADDSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMADDSUBPS", 4, Operands { v0, v1, v2, v3 }) + // VFMADDSUBPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSUBPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMADDSUBPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMADDSUBPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMADDSUBPS") + } + return p +} + +// VFMSUB132PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB132PD +// Supported forms : (11 forms) +// +// * VFMSUB132PD xmm, xmm, xmm [FMA3] +// * VFMSUB132PD m128, xmm, xmm [FMA3] +// * VFMSUB132PD ymm, ymm, ymm [FMA3] +// * VFMSUB132PD m256, ymm, ymm [FMA3] +// * VFMSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB132PD takes 3 or 4 operands") + } + // VFMSUB132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB132PD") + } + return p +} + +// VFMSUB132PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB132PS +// Supported forms : (11 forms) +// +// * VFMSUB132PS xmm, xmm, xmm [FMA3] +// * VFMSUB132PS m128, xmm, xmm [FMA3] +// * VFMSUB132PS ymm, ymm, ymm [FMA3] +// * VFMSUB132PS m256, ymm, ymm [FMA3] +// * VFMSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB132PS takes 3 or 4 operands") + } + // VFMSUB132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB132PS") + } + return p +} + +// VFMSUB132SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB132SD +// Supported forms : (5 forms) +// +// * VFMSUB132SD xmm, xmm, xmm [FMA3] +// * VFMSUB132SD m64, xmm, xmm [FMA3] +// * VFMSUB132SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB132SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB132SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB132SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB132SD takes 3 or 4 operands") + } + // VFMSUB132SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMSUB132SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB132SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB132SD") + } + return p +} + +// VFMSUB132SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB132SS +// Supported forms : (5 forms) +// +// * VFMSUB132SS xmm, xmm, xmm [FMA3] +// * VFMSUB132SS m32, xmm, xmm [FMA3] +// * VFMSUB132SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB132SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB132SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB132SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB132SS takes 3 or 4 operands") + } + // VFMSUB132SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB132SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB132SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMSUB132SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB132SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB132SS") + } + return p +} + +// VFMSUB213PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB213PD +// Supported forms : (11 forms) +// +// * VFMSUB213PD xmm, xmm, xmm [FMA3] +// * VFMSUB213PD m128, xmm, xmm [FMA3] +// * VFMSUB213PD ymm, ymm, ymm [FMA3] +// * VFMSUB213PD m256, ymm, ymm [FMA3] +// * VFMSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB213PD takes 3 or 4 operands") + } + // VFMSUB213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB213PD") + } + return p +} + +// VFMSUB213PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB213PS +// Supported forms : (11 forms) +// +// * VFMSUB213PS xmm, xmm, xmm [FMA3] +// * VFMSUB213PS m128, xmm, xmm [FMA3] +// * VFMSUB213PS ymm, ymm, ymm [FMA3] +// * VFMSUB213PS m256, ymm, ymm [FMA3] +// * VFMSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB213PS takes 3 or 4 operands") + } + // VFMSUB213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xaa) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xaa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB213PS") + } + return p +} + +// VFMSUB213SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB213SD +// Supported forms : (5 forms) +// +// * VFMSUB213SD xmm, xmm, xmm [FMA3] +// * VFMSUB213SD m64, xmm, xmm [FMA3] +// * VFMSUB213SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB213SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB213SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB213SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB213SD takes 3 or 4 operands") + } + // VFMSUB213SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xab) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xab) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xab) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMSUB213SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xab) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB213SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xab) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB213SD") + } + return p +} + +// VFMSUB213SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB213SS +// Supported forms : (5 forms) +// +// * VFMSUB213SS xmm, xmm, xmm [FMA3] +// * VFMSUB213SS m32, xmm, xmm [FMA3] +// * VFMSUB213SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB213SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB213SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB213SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB213SS takes 3 or 4 operands") + } + // VFMSUB213SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xab) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB213SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xab) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB213SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xab) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMSUB213SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xab) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB213SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xab) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB213SS") + } + return p +} + +// VFMSUB231PD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB231PD +// Supported forms : (11 forms) +// +// * VFMSUB231PD xmm, xmm, xmm [FMA3] +// * VFMSUB231PD m128, xmm, xmm [FMA3] +// * VFMSUB231PD ymm, ymm, ymm [FMA3] +// * VFMSUB231PD m256, ymm, ymm [FMA3] +// * VFMSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB231PD takes 3 or 4 operands") + } + // VFMSUB231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xba) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB231PD") + } + return p +} + +// VFMSUB231PS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB231PS +// Supported forms : (11 forms) +// +// * VFMSUB231PS xmm, xmm, xmm [FMA3] +// * VFMSUB231PS m128, xmm, xmm [FMA3] +// * VFMSUB231PS ymm, ymm, ymm [FMA3] +// * VFMSUB231PS m256, ymm, ymm [FMA3] +// * VFMSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB231PS takes 3 or 4 operands") + } + // VFMSUB231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUB231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xba) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUB231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xba) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUB231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xba) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB231PS") + } + return p +} + +// VFMSUB231SD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB231SD +// Supported forms : (5 forms) +// +// * VFMSUB231SD xmm, xmm, xmm [FMA3] +// * VFMSUB231SD m64, xmm, xmm [FMA3] +// * VFMSUB231SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB231SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB231SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB231SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB231SD takes 3 or 4 operands") + } + // VFMSUB231SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbb) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFMSUB231SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB231SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB231SD") + } + return p +} + +// VFMSUB231SS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUB231SS +// Supported forms : (5 forms) +// +// * VFMSUB231SS xmm, xmm, xmm [FMA3] +// * VFMSUB231SS m32, xmm, xmm [FMA3] +// * VFMSUB231SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFMSUB231SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFMSUB231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUB231SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUB231SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUB231SS takes 3 or 4 operands") + } + // VFMSUB231SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUB231SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUB231SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbb) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFMSUB231SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUB231SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUB231SS") + } + return p +} + +// VFMSUBADD132PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD132PD +// Supported forms : (11 forms) +// +// * VFMSUBADD132PD xmm, xmm, xmm [FMA3] +// * VFMSUBADD132PD m128, xmm, xmm [FMA3] +// * VFMSUBADD132PD ymm, ymm, ymm [FMA3] +// * VFMSUBADD132PD m256, ymm, ymm [FMA3] +// * VFMSUBADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD132PD takes 3 or 4 operands") + } + // VFMSUBADD132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x97) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD132PD") + } + return p +} + +// VFMSUBADD132PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD132PS +// Supported forms : (11 forms) +// +// * VFMSUBADD132PS xmm, xmm, xmm [FMA3] +// * VFMSUBADD132PS m128, xmm, xmm [FMA3] +// * VFMSUBADD132PS ymm, ymm, ymm [FMA3] +// * VFMSUBADD132PS m256, ymm, ymm [FMA3] +// * VFMSUBADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD132PS takes 3 or 4 operands") + } + // VFMSUBADD132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x97) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD132PS") + } + return p +} + +// VFMSUBADD213PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD213PD +// Supported forms : (11 forms) +// +// * VFMSUBADD213PD xmm, xmm, xmm [FMA3] +// * VFMSUBADD213PD m128, xmm, xmm [FMA3] +// * VFMSUBADD213PD ymm, ymm, ymm [FMA3] +// * VFMSUBADD213PD m256, ymm, ymm [FMA3] +// * VFMSUBADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD213PD takes 3 or 4 operands") + } + // VFMSUBADD213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD213PD") + } + return p +} + +// VFMSUBADD213PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD213PS +// Supported forms : (11 forms) +// +// * VFMSUBADD213PS xmm, xmm, xmm [FMA3] +// * VFMSUBADD213PS m128, xmm, xmm [FMA3] +// * VFMSUBADD213PS ymm, ymm, ymm [FMA3] +// * VFMSUBADD213PS m256, ymm, ymm [FMA3] +// * VFMSUBADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD213PS takes 3 or 4 operands") + } + // VFMSUBADD213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xa7) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xa7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD213PS") + } + return p +} + +// VFMSUBADD231PD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD231PD +// Supported forms : (11 forms) +// +// * VFMSUBADD231PD xmm, xmm, xmm [FMA3] +// * VFMSUBADD231PD m128, xmm, xmm [FMA3] +// * VFMSUBADD231PD ymm, ymm, ymm [FMA3] +// * VFMSUBADD231PD m256, ymm, ymm [FMA3] +// * VFMSUBADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD231PD takes 3 or 4 operands") + } + // VFMSUBADD231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD231PD") + } + return p +} + +// VFMSUBADD231PS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADD231PS +// Supported forms : (11 forms) +// +// * VFMSUBADD231PS xmm, xmm, xmm [FMA3] +// * VFMSUBADD231PS m128, xmm, xmm [FMA3] +// * VFMSUBADD231PS ymm, ymm, ymm [FMA3] +// * VFMSUBADD231PS m256, ymm, ymm [FMA3] +// * VFMSUBADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFMSUBADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFMSUBADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFMSUBADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFMSUBADD231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFMSUBADD231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFMSUBADD231PS takes 3 or 4 operands") + } + // VFMSUBADD231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFMSUBADD231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFMSUBADD231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFMSUBADD231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFMSUBADD231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFMSUBADD231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb7) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFMSUBADD231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb7) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADD231PS") + } + return p +} + +// VFMSUBADDPD performs "Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADDPD +// Supported forms : (6 forms) +// +// * VFMSUBADDPD xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBADDPD m128, xmm, xmm, xmm [FMA4] +// * VFMSUBADDPD xmm, m128, xmm, xmm [FMA4] +// * VFMSUBADDPD ymm, ymm, ymm, ymm [FMA4] +// * VFMSUBADDPD m256, ymm, ymm, ymm [FMA4] +// * VFMSUBADDPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMSUBADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBADDPD", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBADDPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5f) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBADDPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5f) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBADDPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADDPD") + } + return p +} + +// VFMSUBADDPS performs "Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBADDPS +// Supported forms : (6 forms) +// +// * VFMSUBADDPS xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBADDPS m128, xmm, xmm, xmm [FMA4] +// * VFMSUBADDPS xmm, m128, xmm, xmm [FMA4] +// * VFMSUBADDPS ymm, ymm, ymm, ymm [FMA4] +// * VFMSUBADDPS m256, ymm, ymm, ymm [FMA4] +// * VFMSUBADDPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMSUBADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBADDPS", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBADDPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5e) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBADDPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x5e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBADDPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x5e) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBADDPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x5e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBADDPS") + } + return p +} + +// VFMSUBPD performs "Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBPD +// Supported forms : (6 forms) +// +// * VFMSUBPD xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBPD m128, xmm, xmm, xmm [FMA4] +// * VFMSUBPD xmm, m128, xmm, xmm [FMA4] +// * VFMSUBPD ymm, ymm, ymm, ymm [FMA4] +// * VFMSUBPD m256, ymm, ymm, ymm [FMA4] +// * VFMSUBPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBPD", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBPD") + } + return p +} + +// VFMSUBPS performs "Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBPS +// Supported forms : (6 forms) +// +// * VFMSUBPS xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBPS m128, xmm, xmm, xmm [FMA4] +// * VFMSUBPS xmm, m128, xmm, xmm [FMA4] +// * VFMSUBPS ymm, ymm, ymm, ymm [FMA4] +// * VFMSUBPS m256, ymm, ymm, ymm [FMA4] +// * VFMSUBPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFMSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBPS", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBPS") + } + return p +} + +// VFMSUBSD performs "Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBSD +// Supported forms : (3 forms) +// +// * VFMSUBSD xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBSD m64, xmm, xmm, xmm [FMA4] +// * VFMSUBSD xmm, m64, xmm, xmm [FMA4] +// +func (self *Program) VFMSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBSD", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBSD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBSD m64, xmm, xmm, xmm + if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6f) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBSD xmm, m64, xmm, xmm + if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBSD") + } + return p +} + +// VFMSUBSS performs "Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFMSUBSS +// Supported forms : (3 forms) +// +// * VFMSUBSS xmm, xmm, xmm, xmm [FMA4] +// * VFMSUBSS m32, xmm, xmm, xmm [FMA4] +// * VFMSUBSS xmm, m32, xmm, xmm [FMA4] +// +func (self *Program) VFMSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFMSUBSS", 4, Operands { v0, v1, v2, v3 }) + // VFMSUBSS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFMSUBSS m32, xmm, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x6e) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFMSUBSS xmm, m32, xmm, xmm + if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x6e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFMSUBSS") + } + return p +} + +// VFNMADD132PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD132PD +// Supported forms : (11 forms) +// +// * VFNMADD132PD xmm, xmm, xmm [FMA3] +// * VFNMADD132PD m128, xmm, xmm [FMA3] +// * VFNMADD132PD ymm, ymm, ymm [FMA3] +// * VFNMADD132PD m256, ymm, ymm [FMA3] +// * VFNMADD132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD132PD takes 3 or 4 operands") + } + // VFNMADD132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD132PD") + } + return p +} + +// VFNMADD132PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD132PS +// Supported forms : (11 forms) +// +// * VFNMADD132PS xmm, xmm, xmm [FMA3] +// * VFNMADD132PS m128, xmm, xmm [FMA3] +// * VFNMADD132PS ymm, ymm, ymm [FMA3] +// * VFNMADD132PS m256, ymm, ymm [FMA3] +// * VFNMADD132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD132PS takes 3 or 4 operands") + } + // VFNMADD132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD132PS") + } + return p +} + +// VFNMADD132SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD132SD +// Supported forms : (5 forms) +// +// * VFNMADD132SD xmm, xmm, xmm [FMA3] +// * VFNMADD132SD m64, xmm, xmm [FMA3] +// * VFNMADD132SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD132SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD132SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD132SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD132SD takes 3 or 4 operands") + } + // VFNMADD132SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9d) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMADD132SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD132SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD132SD") + } + return p +} + +// VFNMADD132SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD132SS +// Supported forms : (5 forms) +// +// * VFNMADD132SS xmm, xmm, xmm [FMA3] +// * VFNMADD132SS m32, xmm, xmm [FMA3] +// * VFNMADD132SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD132SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD132SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD132SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD132SS takes 3 or 4 operands") + } + // VFNMADD132SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD132SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD132SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9d) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMADD132SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD132SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD132SS") + } + return p +} + +// VFNMADD213PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD213PD +// Supported forms : (11 forms) +// +// * VFNMADD213PD xmm, xmm, xmm [FMA3] +// * VFNMADD213PD m128, xmm, xmm [FMA3] +// * VFNMADD213PD ymm, ymm, ymm [FMA3] +// * VFNMADD213PD m256, ymm, ymm [FMA3] +// * VFNMADD213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD213PD takes 3 or 4 operands") + } + // VFNMADD213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xac) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD213PD") + } + return p +} + +// VFNMADD213PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD213PS +// Supported forms : (11 forms) +// +// * VFNMADD213PS xmm, xmm, xmm [FMA3] +// * VFNMADD213PS m128, xmm, xmm [FMA3] +// * VFNMADD213PS ymm, ymm, ymm [FMA3] +// * VFNMADD213PS m256, ymm, ymm [FMA3] +// * VFNMADD213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD213PS takes 3 or 4 operands") + } + // VFNMADD213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xac) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xac) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xac) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD213PS") + } + return p +} + +// VFNMADD213SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD213SD +// Supported forms : (5 forms) +// +// * VFNMADD213SD xmm, xmm, xmm [FMA3] +// * VFNMADD213SD m64, xmm, xmm [FMA3] +// * VFNMADD213SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD213SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD213SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD213SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD213SD takes 3 or 4 operands") + } + // VFNMADD213SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xad) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xad) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xad) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMADD213SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xad) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD213SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xad) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD213SD") + } + return p +} + +// VFNMADD213SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD213SS +// Supported forms : (5 forms) +// +// * VFNMADD213SS xmm, xmm, xmm [FMA3] +// * VFNMADD213SS m32, xmm, xmm [FMA3] +// * VFNMADD213SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD213SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD213SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD213SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD213SS takes 3 or 4 operands") + } + // VFNMADD213SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xad) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD213SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xad) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD213SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xad) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMADD213SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xad) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD213SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xad) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD213SS") + } + return p +} + +// VFNMADD231PD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD231PD +// Supported forms : (11 forms) +// +// * VFNMADD231PD xmm, xmm, xmm [FMA3] +// * VFNMADD231PD m128, xmm, xmm [FMA3] +// * VFNMADD231PD ymm, ymm, ymm [FMA3] +// * VFNMADD231PD m256, ymm, ymm [FMA3] +// * VFNMADD231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD231PD takes 3 or 4 operands") + } + // VFNMADD231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD231PD") + } + return p +} + +// VFNMADD231PS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD231PS +// Supported forms : (11 forms) +// +// * VFNMADD231PS xmm, xmm, xmm [FMA3] +// * VFNMADD231PS m128, xmm, xmm [FMA3] +// * VFNMADD231PS ymm, ymm, ymm [FMA3] +// * VFNMADD231PS m256, ymm, ymm [FMA3] +// * VFNMADD231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMADD231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMADD231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMADD231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD231PS takes 3 or 4 operands") + } + // VFNMADD231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMADD231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMADD231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbc) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMADD231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xbc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD231PS") + } + return p +} + +// VFNMADD231SD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD231SD +// Supported forms : (5 forms) +// +// * VFNMADD231SD xmm, xmm, xmm [FMA3] +// * VFNMADD231SD m64, xmm, xmm [FMA3] +// * VFNMADD231SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD231SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD231SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD231SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD231SD takes 3 or 4 operands") + } + // VFNMADD231SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbd) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMADD231SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD231SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD231SD") + } + return p +} + +// VFNMADD231SS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADD231SS +// Supported forms : (5 forms) +// +// * VFNMADD231SS xmm, xmm, xmm [FMA3] +// * VFNMADD231SS m32, xmm, xmm [FMA3] +// * VFNMADD231SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMADD231SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMADD231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMADD231SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMADD231SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMADD231SS takes 3 or 4 operands") + } + // VFNMADD231SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMADD231SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMADD231SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbd) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMADD231SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMADD231SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADD231SS") + } + return p +} + +// VFNMADDPD performs "Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADDPD +// Supported forms : (6 forms) +// +// * VFNMADDPD xmm, xmm, xmm, xmm [FMA4] +// * VFNMADDPD m128, xmm, xmm, xmm [FMA4] +// * VFNMADDPD xmm, m128, xmm, xmm [FMA4] +// * VFNMADDPD ymm, ymm, ymm, ymm [FMA4] +// * VFNMADDPD m256, ymm, ymm, ymm [FMA4] +// * VFNMADDPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFNMADDPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMADDPD", 4, Operands { v0, v1, v2, v3 }) + // VFNMADDPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x79) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x79) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x79) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x79) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x79) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x79) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x79) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x79) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADDPD") + } + return p +} + +// VFNMADDPS performs "Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADDPS +// Supported forms : (6 forms) +// +// * VFNMADDPS xmm, xmm, xmm, xmm [FMA4] +// * VFNMADDPS m128, xmm, xmm, xmm [FMA4] +// * VFNMADDPS xmm, m128, xmm, xmm [FMA4] +// * VFNMADDPS ymm, ymm, ymm, ymm [FMA4] +// * VFNMADDPS m256, ymm, ymm, ymm [FMA4] +// * VFNMADDPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFNMADDPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMADDPS", 4, Operands { v0, v1, v2, v3 }) + // VFNMADDPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x78) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x78) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x78) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x78) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x78) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADDPS") + } + return p +} + +// VFNMADDSD performs "Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMADDSD +// Supported forms : (3 forms) +// +// * VFNMADDSD xmm, xmm, xmm, xmm [FMA4] +// * VFNMADDSD m64, xmm, xmm, xmm [FMA4] +// * VFNMADDSD xmm, m64, xmm, xmm [FMA4] +// +func (self *Program) VFNMADDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMADDSD", 4, Operands { v0, v1, v2, v3 }) + // VFNMADDSD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDSD m64, xmm, xmm, xmm + if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7b) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDSD xmm, m64, xmm, xmm + if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADDSD") + } + return p +} + +// VFNMADDSS performs "Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMADDSS +// Supported forms : (3 forms) +// +// * VFNMADDSS xmm, xmm, xmm, xmm [FMA4] +// * VFNMADDSS m32, xmm, xmm, xmm [FMA4] +// * VFNMADDSS xmm, m32, xmm, xmm [FMA4] +// +func (self *Program) VFNMADDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMADDSS", 4, Operands { v0, v1, v2, v3 }) + // VFNMADDSS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMADDSS m32, xmm, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7a) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMADDSS xmm, m32, xmm, xmm + if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMADDSS") + } + return p +} + +// VFNMSUB132PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB132PD +// Supported forms : (11 forms) +// +// * VFNMSUB132PD xmm, xmm, xmm [FMA3] +// * VFNMSUB132PD m128, xmm, xmm [FMA3] +// * VFNMSUB132PD ymm, ymm, ymm [FMA3] +// * VFNMSUB132PD m256, ymm, ymm [FMA3] +// * VFNMSUB132PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB132PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB132PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB132PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB132PD takes 3 or 4 operands") + } + // VFNMSUB132PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB132PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB132PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB132PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB132PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB132PD") + } + return p +} + +// VFNMSUB132PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB132PS +// Supported forms : (11 forms) +// +// * VFNMSUB132PS xmm, xmm, xmm [FMA3] +// * VFNMSUB132PS m128, xmm, xmm [FMA3] +// * VFNMSUB132PS ymm, ymm, ymm [FMA3] +// * VFNMSUB132PS m256, ymm, ymm [FMA3] +// * VFNMSUB132PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB132PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB132PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB132PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB132PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB132PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB132PS takes 3 or 4 operands") + } + // VFNMSUB132PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB132PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB132PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB132PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x9e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB132PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB132PS") + } + return p +} + +// VFNMSUB132SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB132SD +// Supported forms : (5 forms) +// +// * VFNMSUB132SD xmm, xmm, xmm [FMA3] +// * VFNMSUB132SD m64, xmm, xmm [FMA3] +// * VFNMSUB132SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB132SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB132SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB132SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB132SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB132SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB132SD takes 3 or 4 operands") + } + // VFNMSUB132SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9f) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMSUB132SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB132SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB132SD") + } + return p +} + +// VFNMSUB132SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB132SS +// Supported forms : (5 forms) +// +// * VFNMSUB132SS xmm, xmm, xmm [FMA3] +// * VFNMSUB132SS m32, xmm, xmm [FMA3] +// * VFNMSUB132SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB132SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB132SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB132SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB132SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB132SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB132SS takes 3 or 4 operands") + } + // VFNMSUB132SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB132SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB132SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x9f) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMSUB132SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB132SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB132SS") + } + return p +} + +// VFNMSUB213PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB213PD +// Supported forms : (11 forms) +// +// * VFNMSUB213PD xmm, xmm, xmm [FMA3] +// * VFNMSUB213PD m128, xmm, xmm [FMA3] +// * VFNMSUB213PD ymm, ymm, ymm [FMA3] +// * VFNMSUB213PD m256, ymm, ymm [FMA3] +// * VFNMSUB213PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB213PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB213PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB213PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB213PD takes 3 or 4 operands") + } + // VFNMSUB213PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB213PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xae) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB213PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB213PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB213PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB213PD") + } + return p +} + +// VFNMSUB213PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB213PS +// Supported forms : (11 forms) +// +// * VFNMSUB213PS xmm, xmm, xmm [FMA3] +// * VFNMSUB213PS m128, xmm, xmm [FMA3] +// * VFNMSUB213PS ymm, ymm, ymm [FMA3] +// * VFNMSUB213PS m256, ymm, ymm [FMA3] +// * VFNMSUB213PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB213PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB213PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB213PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB213PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB213PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB213PS takes 3 or 4 operands") + } + // VFNMSUB213PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB213PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xae) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB213PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB213PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xae) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB213PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xae) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB213PS") + } + return p +} + +// VFNMSUB213SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB213SD +// Supported forms : (5 forms) +// +// * VFNMSUB213SD xmm, xmm, xmm [FMA3] +// * VFNMSUB213SD m64, xmm, xmm [FMA3] +// * VFNMSUB213SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB213SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB213SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB213SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB213SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB213SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB213SD takes 3 or 4 operands") + } + // VFNMSUB213SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xaf) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMSUB213SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB213SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB213SD") + } + return p +} + +// VFNMSUB213SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB213SS +// Supported forms : (5 forms) +// +// * VFNMSUB213SS xmm, xmm, xmm [FMA3] +// * VFNMSUB213SS m32, xmm, xmm [FMA3] +// * VFNMSUB213SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB213SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB213SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB213SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB213SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB213SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB213SS takes 3 or 4 operands") + } + // VFNMSUB213SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB213SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xaf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB213SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xaf) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMSUB213SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB213SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xaf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB213SS") + } + return p +} + +// VFNMSUB231PD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB231PD +// Supported forms : (11 forms) +// +// * VFNMSUB231PD xmm, xmm, xmm [FMA3] +// * VFNMSUB231PD m128, xmm, xmm [FMA3] +// * VFNMSUB231PD ymm, ymm, ymm [FMA3] +// * VFNMSUB231PD m256, ymm, ymm [FMA3] +// * VFNMSUB231PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB231PD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB231PD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB231PD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB231PD takes 3 or 4 operands") + } + // VFNMSUB231PD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231PD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231PD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB231PD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB231PD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB231PD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB231PD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB231PD") + } + return p +} + +// VFNMSUB231PS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB231PS +// Supported forms : (11 forms) +// +// * VFNMSUB231PS xmm, xmm, xmm [FMA3] +// * VFNMSUB231PS m128, xmm, xmm [FMA3] +// * VFNMSUB231PS ymm, ymm, ymm [FMA3] +// * VFNMSUB231PS m256, ymm, ymm [FMA3] +// * VFNMSUB231PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VFNMSUB231PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VFNMSUB231PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VFNMSUB231PS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB231PS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB231PS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB231PS takes 3 or 4 operands") + } + // VFNMSUB231PS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231PS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231PS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VFNMSUB231PS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB231PS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VFNMSUB231PS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231PS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xbe) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VFNMSUB231PS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xbe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB231PS") + } + return p +} + +// VFNMSUB231SD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB231SD +// Supported forms : (5 forms) +// +// * VFNMSUB231SD xmm, xmm, xmm [FMA3] +// * VFNMSUB231SD m64, xmm, xmm [FMA3] +// * VFNMSUB231SD m64, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB231SD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB231SD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB231SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB231SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB231SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB231SD takes 3 or 4 operands") + } + // VFNMSUB231SD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231SD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbf) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VFNMSUB231SD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB231SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB231SD") + } + return p +} + +// VFNMSUB231SS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUB231SS +// Supported forms : (5 forms) +// +// * VFNMSUB231SS xmm, xmm, xmm [FMA3] +// * VFNMSUB231SS m32, xmm, xmm [FMA3] +// * VFNMSUB231SS m32, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB231SS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VFNMSUB231SS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VFNMSUB231SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VFNMSUB231SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VFNMSUB231SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VFNMSUB231SS takes 3 or 4 operands") + } + // VFNMSUB231SS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VFNMSUB231SS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_FMA3) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xbf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VFNMSUB231SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xbf) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VFNMSUB231SS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VFNMSUB231SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xbf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUB231SS") + } + return p +} + +// VFNMSUBPD performs "Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUBPD +// Supported forms : (6 forms) +// +// * VFNMSUBPD xmm, xmm, xmm, xmm [FMA4] +// * VFNMSUBPD m128, xmm, xmm, xmm [FMA4] +// * VFNMSUBPD xmm, m128, xmm, xmm [FMA4] +// * VFNMSUBPD ymm, ymm, ymm, ymm [FMA4] +// * VFNMSUBPD m256, ymm, ymm, ymm [FMA4] +// * VFNMSUBPD ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFNMSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMSUBPD", 4, Operands { v0, v1, v2, v3 }) + // VFNMSUBPD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPD m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBPD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPD ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPD m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7d) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBPD ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7d) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUBPD") + } + return p +} + +// VFNMSUBPS performs "Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUBPS +// Supported forms : (6 forms) +// +// * VFNMSUBPS xmm, xmm, xmm, xmm [FMA4] +// * VFNMSUBPS m128, xmm, xmm, xmm [FMA4] +// * VFNMSUBPS xmm, m128, xmm, xmm [FMA4] +// * VFNMSUBPS ymm, ymm, ymm, ymm [FMA4] +// * VFNMSUBPS m256, ymm, ymm, ymm [FMA4] +// * VFNMSUBPS ymm, m256, ymm, ymm [FMA4] +// +func (self *Program) VFNMSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMSUBPS", 4, Operands { v0, v1, v2, v3 }) + // VFNMSUBPS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPS m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBPS xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPS ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBPS m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7c) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBPS ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUBPS") + } + return p +} + +// VFNMSUBSD performs "Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUBSD +// Supported forms : (3 forms) +// +// * VFNMSUBSD xmm, xmm, xmm, xmm [FMA4] +// * VFNMSUBSD m64, xmm, xmm, xmm [FMA4] +// * VFNMSUBSD xmm, m64, xmm, xmm [FMA4] +// +func (self *Program) VFNMSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMSUBSD", 4, Operands { v0, v1, v2, v3 }) + // VFNMSUBSD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBSD m64, xmm, xmm, xmm + if isM64(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7f) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBSD xmm, m64, xmm, xmm + if isXMM(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUBSD") + } + return p +} + +// VFNMSUBSS performs "Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VFNMSUBSS +// Supported forms : (3 forms) +// +// * VFNMSUBSS xmm, xmm, xmm, xmm [FMA4] +// * VFNMSUBSS m32, xmm, xmm, xmm [FMA4] +// * VFNMSUBSS xmm, m32, xmm, xmm [FMA4] +// +func (self *Program) VFNMSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VFNMSUBSS", 4, Operands { v0, v1, v2, v3 }) + // VFNMSUBSS xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VFNMSUBSS m32, xmm, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0x7e) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VFNMSUBSS xmm, m32, xmm, xmm + if isXMM(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_FMA4) + p.domain = DomainFMA + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x7e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VFNMSUBSS") + } + return p +} + +// VFPCLASSPD performs "Test Class of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VFPCLASSPD +// Supported forms : (6 forms) +// +// * VFPCLASSPD imm8, m512/m64bcst, k{k} [AVX512DQ] +// * VFPCLASSPD imm8, zmm, k{k} [AVX512DQ] +// * VFPCLASSPD imm8, m128/m64bcst, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPD imm8, m256/m64bcst, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPD imm8, xmm, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPD imm8, ymm, k{k} [AVX512DQ,AVX512VL] +// +func (self *Program) VFPCLASSPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VFPCLASSPD", 3, Operands { v0, v1, v2 }) + // VFPCLASSPD imm8, m512/m64bcst, k{k} + if isImm8(v0) && isM512M64bcst(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(kcode(v[2]) | 0x48) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, m128/m64bcst, k{k} + if isImm8(v0) && isM128M64bcst(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, m256/m64bcst, k{k} + if isImm8(v0) && isM256M64bcst(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(kcode(v[2]) | 0x08) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPD imm8, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(kcode(v[2]) | 0x28) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFPCLASSPD") + } + return p +} + +// VFPCLASSPS performs "Test Class of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VFPCLASSPS +// Supported forms : (6 forms) +// +// * VFPCLASSPS imm8, m512/m32bcst, k{k} [AVX512DQ] +// * VFPCLASSPS imm8, zmm, k{k} [AVX512DQ] +// * VFPCLASSPS imm8, m128/m32bcst, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPS imm8, m256/m32bcst, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPS imm8, xmm, k{k} [AVX512DQ,AVX512VL] +// * VFPCLASSPS imm8, ymm, k{k} [AVX512DQ,AVX512VL] +// +func (self *Program) VFPCLASSPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VFPCLASSPS", 3, Operands { v0, v1, v2 }) + // VFPCLASSPS imm8, m512/m32bcst, k{k} + if isImm8(v0) && isM512M32bcst(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(kcode(v[2]) | 0x48) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, m128/m32bcst, k{k} + if isImm8(v0) && isM128M32bcst(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, m256/m32bcst, k{k} + if isImm8(v0) && isM256M32bcst(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, bcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(kcode(v[2]) | 0x08) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSPS imm8, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(kcode(v[2]) | 0x28) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFPCLASSPS") + } + return p +} + +// VFPCLASSSD performs "Test Class of Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VFPCLASSSD +// Supported forms : (2 forms) +// +// * VFPCLASSSD imm8, xmm, k{k} [AVX512DQ] +// * VFPCLASSSD imm8, m64, k{k} [AVX512DQ] +// +func (self *Program) VFPCLASSSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VFPCLASSSD", 3, Operands { v0, v1, v2 }) + // VFPCLASSSD imm8, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(kcode(v[2]) | 0x08) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSSD imm8, m64, k{k} + if isImm8(v0) && isM64(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFPCLASSSD") + } + return p +} + +// VFPCLASSSS performs "Test Class of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VFPCLASSSS +// Supported forms : (2 forms) +// +// * VFPCLASSSS imm8, xmm, k{k} [AVX512DQ] +// * VFPCLASSSS imm8, m32, k{k} [AVX512DQ] +// +func (self *Program) VFPCLASSSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VFPCLASSSS", 3, Operands { v0, v1, v2 }) + // VFPCLASSSS imm8, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(kcode(v[2]) | 0x08) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VFPCLASSSS imm8, m32, k{k} + if isImm8(v0) && isM32(v1) && isKk(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), 0, 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VFPCLASSSS") + } + return p +} + +// VFRCZPD performs "Extract Fraction Packed Double-Precision Floating-Point". +// +// Mnemonic : VFRCZPD +// Supported forms : (4 forms) +// +// * VFRCZPD xmm, xmm [XOP] +// * VFRCZPD m128, xmm [XOP] +// * VFRCZPD ymm, ymm [XOP] +// * VFRCZPD m256, ymm [XOP] +// +func (self *Program) VFRCZPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VFRCZPD", 2, Operands { v0, v1 }) + // VFRCZPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0x81) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VFRCZPD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7c) + m.emit(0x81) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZPD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x04, hcode(v[1]), addr(v[0]), 0) + m.emit(0x81) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VFRCZPD") + } + return p +} + +// VFRCZPS performs "Extract Fraction Packed Single-Precision Floating-Point". +// +// Mnemonic : VFRCZPS +// Supported forms : (4 forms) +// +// * VFRCZPS xmm, xmm [XOP] +// * VFRCZPS m128, xmm [XOP] +// * VFRCZPS ymm, ymm [XOP] +// * VFRCZPS m256, ymm [XOP] +// +func (self *Program) VFRCZPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VFRCZPS", 2, Operands { v0, v1 }) + // VFRCZPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0x80) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0x80) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VFRCZPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7c) + m.emit(0x80) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x04, hcode(v[1]), addr(v[0]), 0) + m.emit(0x80) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VFRCZPS") + } + return p +} + +// VFRCZSD performs "Extract Fraction Scalar Double-Precision Floating-Point". +// +// Mnemonic : VFRCZSD +// Supported forms : (2 forms) +// +// * VFRCZSD xmm, xmm [XOP] +// * VFRCZSD m64, xmm [XOP] +// +func (self *Program) VFRCZSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VFRCZSD", 2, Operands { v0, v1 }) + // VFRCZSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0x83) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZSD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0x83) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VFRCZSD") + } + return p +} + +// VFRCZSS performs "Extract Fraction Scalar Single-Precision Floating Point". +// +// Mnemonic : VFRCZSS +// Supported forms : (2 forms) +// +// * VFRCZSS xmm, xmm [XOP] +// * VFRCZSS m32, xmm [XOP] +// +func (self *Program) VFRCZSS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VFRCZSS", 2, Operands { v0, v1 }) + // VFRCZSS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0x82) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VFRCZSS m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0x82) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VFRCZSS") + } + return p +} + +// VGATHERDPD performs "Gather Packed Double-Precision Floating-Point Values Using Signed Doubleword Indices". +// +// Mnemonic : VGATHERDPD +// Supported forms : (5 forms) +// +// * VGATHERDPD xmm, vm32x, xmm [AVX2] +// * VGATHERDPD ymm, vm32x, ymm [AVX2] +// * VGATHERDPD vm32y, zmm{k} [AVX512F] +// * VGATHERDPD vm32x, xmm{k} [AVX512F,AVX512VL] +// * VGATHERDPD vm32x, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VGATHERDPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGATHERDPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGATHERDPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGATHERDPD takes 2 or 3 operands") + } + // VGATHERDPD xmm, vm32x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERDPD ymm, vm32x, ymm + if len(vv) == 1 && isYMM(v0) && isVMX(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERDPD vm32y, zmm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VGATHERDPD vm32x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VGATHERDPD vm32x, ymm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERDPD") + } + return p +} + +// VGATHERDPS performs "Gather Packed Single-Precision Floating-Point Values Using Signed Doubleword Indices". +// +// Mnemonic : VGATHERDPS +// Supported forms : (5 forms) +// +// * VGATHERDPS xmm, vm32x, xmm [AVX2] +// * VGATHERDPS ymm, vm32y, ymm [AVX2] +// * VGATHERDPS vm32z, zmm{k} [AVX512F] +// * VGATHERDPS vm32x, xmm{k} [AVX512F,AVX512VL] +// * VGATHERDPS vm32y, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VGATHERDPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGATHERDPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGATHERDPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGATHERDPS takes 2 or 3 operands") + } + // VGATHERDPS xmm, vm32x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERDPS ymm, vm32y, ymm + if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERDPS vm32z, zmm{k} + if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VGATHERDPS vm32x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VGATHERDPS vm32y, ymm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x92) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERDPS") + } + return p +} + +// VGATHERPF0DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint". +// +// Mnemonic : VGATHERPF0DPD +// Supported forms : (1 form) +// +// * VGATHERPF0DPD vm32y{k} [AVX512PF] +// +func (self *Program) VGATHERPF0DPD(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF0DPD", 1, Operands { v0 }) + // VGATHERPF0DPD vm32y{k} + if isVMYk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(1, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF0DPD") + } + return p +} + +// VGATHERPF0DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint". +// +// Mnemonic : VGATHERPF0DPS +// Supported forms : (1 form) +// +// * VGATHERPF0DPS vm32z{k} [AVX512PF] +// +func (self *Program) VGATHERPF0DPS(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF0DPS", 1, Operands { v0 }) + // VGATHERPF0DPS vm32z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(1, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF0DPS") + } + return p +} + +// VGATHERPF0QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint". +// +// Mnemonic : VGATHERPF0QPD +// Supported forms : (1 form) +// +// * VGATHERPF0QPD vm64z{k} [AVX512PF] +// +func (self *Program) VGATHERPF0QPD(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF0QPD", 1, Operands { v0 }) + // VGATHERPF0QPD vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(1, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF0QPD") + } + return p +} + +// VGATHERPF0QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint". +// +// Mnemonic : VGATHERPF0QPS +// Supported forms : (1 form) +// +// * VGATHERPF0QPS vm64z{k} [AVX512PF] +// +func (self *Program) VGATHERPF0QPS(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF0QPS", 1, Operands { v0 }) + // VGATHERPF0QPS vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(1, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF0QPS") + } + return p +} + +// VGATHERPF1DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint". +// +// Mnemonic : VGATHERPF1DPD +// Supported forms : (1 form) +// +// * VGATHERPF1DPD vm32y{k} [AVX512PF] +// +func (self *Program) VGATHERPF1DPD(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF1DPD", 1, Operands { v0 }) + // VGATHERPF1DPD vm32y{k} + if isVMYk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(2, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF1DPD") + } + return p +} + +// VGATHERPF1DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint". +// +// Mnemonic : VGATHERPF1DPS +// Supported forms : (1 form) +// +// * VGATHERPF1DPS vm32z{k} [AVX512PF] +// +func (self *Program) VGATHERPF1DPS(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF1DPS", 1, Operands { v0 }) + // VGATHERPF1DPS vm32z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(2, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF1DPS") + } + return p +} + +// VGATHERPF1QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint". +// +// Mnemonic : VGATHERPF1QPD +// Supported forms : (1 form) +// +// * VGATHERPF1QPD vm64z{k} [AVX512PF] +// +func (self *Program) VGATHERPF1QPD(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF1QPD", 1, Operands { v0 }) + // VGATHERPF1QPD vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(2, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF1QPD") + } + return p +} + +// VGATHERPF1QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint". +// +// Mnemonic : VGATHERPF1QPS +// Supported forms : (1 form) +// +// * VGATHERPF1QPS vm64z{k} [AVX512PF] +// +func (self *Program) VGATHERPF1QPS(v0 interface{}) *Instruction { + p := self.alloc("VGATHERPF1QPS", 1, Operands { v0 }) + // VGATHERPF1QPS vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(2, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERPF1QPS") + } + return p +} + +// VGATHERQPD performs "Gather Packed Double-Precision Floating-Point Values Using Signed Quadword Indices". +// +// Mnemonic : VGATHERQPD +// Supported forms : (5 forms) +// +// * VGATHERQPD xmm, vm64x, xmm [AVX2] +// * VGATHERQPD ymm, vm64y, ymm [AVX2] +// * VGATHERQPD vm64z, zmm{k} [AVX512F] +// * VGATHERQPD vm64x, xmm{k} [AVX512F,AVX512VL] +// * VGATHERQPD vm64y, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VGATHERQPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGATHERQPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGATHERQPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGATHERQPD takes 2 or 3 operands") + } + // VGATHERQPD xmm, vm64x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERQPD ymm, vm64y, ymm + if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERQPD vm64z, zmm{k} + if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VGATHERQPD vm64x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VGATHERQPD vm64y, ymm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERQPD") + } + return p +} + +// VGATHERQPS performs "Gather Packed Single-Precision Floating-Point Values Using Signed Quadword Indices". +// +// Mnemonic : VGATHERQPS +// Supported forms : (5 forms) +// +// * VGATHERQPS xmm, vm64x, xmm [AVX2] +// * VGATHERQPS xmm, vm64y, xmm [AVX2] +// * VGATHERQPS vm64z, ymm{k} [AVX512F] +// * VGATHERQPS vm64x, xmm{k} [AVX512F,AVX512VL] +// * VGATHERQPS vm64y, xmm{k} [AVX512F,AVX512VL] +// +func (self *Program) VGATHERQPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGATHERQPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGATHERQPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGATHERQPS takes 2 or 3 operands") + } + // VGATHERQPS xmm, vm64x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERQPS xmm, vm64y, xmm + if len(vv) == 1 && isXMM(v0) && isVMY(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VGATHERQPS vm64z, ymm{k} + if len(vv) == 0 && isVMZ(v0) && isYMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VGATHERQPS vm64x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VGATHERQPS vm64y, xmm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x93) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VGATHERQPS") + } + return p +} + +// VGETEXPPD performs "Extract Exponents of Packed Double-Precision Floating-Point Values as Double-Precision Floating-Point Values". +// +// Mnemonic : VGETEXPPD +// Supported forms : (7 forms) +// +// * VGETEXPPD m512/m64bcst, zmm{k}{z} [AVX512F] +// * VGETEXPPD {sae}, zmm, zmm{k}{z} [AVX512F] +// * VGETEXPPD zmm, zmm{k}{z} [AVX512F] +// * VGETEXPPD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VGETEXPPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETEXPPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGETEXPPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGETEXPPD takes 2 or 3 operands") + } + // VGETEXPPD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VGETEXPPD {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VGETEXPPD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VGETEXPPD m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VGETEXPPD m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VGETEXPPD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VGETEXPPD ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETEXPPD") + } + return p +} + +// VGETEXPPS performs "Extract Exponents of Packed Single-Precision Floating-Point Values as Single-Precision Floating-Point Values". +// +// Mnemonic : VGETEXPPS +// Supported forms : (7 forms) +// +// * VGETEXPPS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VGETEXPPS {sae}, zmm, zmm{k}{z} [AVX512F] +// * VGETEXPPS zmm, zmm{k}{z} [AVX512F] +// * VGETEXPPS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETEXPPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VGETEXPPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETEXPPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VGETEXPPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VGETEXPPS takes 2 or 3 operands") + } + // VGETEXPPS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VGETEXPPS {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0x42) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VGETEXPPS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VGETEXPPS m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VGETEXPPS m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x42) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VGETEXPPS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VGETEXPPS ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x42) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETEXPPS") + } + return p +} + +// VGETEXPSD performs "Extract Exponent of Scalar Double-Precision Floating-Point Value as Double-Precision Floating-Point Value". +// +// Mnemonic : VGETEXPSD +// Supported forms : (3 forms) +// +// * VGETEXPSD m64, xmm, xmm{k}{z} [AVX512F] +// * VGETEXPSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VGETEXPSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VGETEXPSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETEXPSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VGETEXPSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VGETEXPSD takes 3 or 4 operands") + } + // VGETEXPSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x43) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VGETEXPSD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VGETEXPSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x43) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETEXPSD") + } + return p +} + +// VGETEXPSS performs "Extract Exponent of Scalar Single-Precision Floating-Point Value as Single-Precision Floating-Point Value". +// +// Mnemonic : VGETEXPSS +// Supported forms : (3 forms) +// +// * VGETEXPSS m32, xmm, xmm{k}{z} [AVX512F] +// * VGETEXPSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VGETEXPSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VGETEXPSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETEXPSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VGETEXPSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VGETEXPSS takes 3 or 4 operands") + } + // VGETEXPSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x43) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VGETEXPSS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VGETEXPSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x43) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETEXPSS") + } + return p +} + +// VGETMANTPD performs "Extract Normalized Mantissas from Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VGETMANTPD +// Supported forms : (7 forms) +// +// * VGETMANTPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VGETMANTPD imm8, {sae}, zmm, zmm{k}{z} [AVX512F] +// * VGETMANTPD imm8, zmm, zmm{k}{z} [AVX512F] +// * VGETMANTPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VGETMANTPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETMANTPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VGETMANTPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VGETMANTPD takes 3 or 4 operands") + } + // VGETMANTPD imm8, m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x26) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPD imm8, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETMANTPD") + } + return p +} + +// VGETMANTPS performs "Extract Normalized Mantissas from Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VGETMANTPS +// Supported forms : (7 forms) +// +// * VGETMANTPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VGETMANTPS imm8, {sae}, zmm, zmm{k}{z} [AVX512F] +// * VGETMANTPS imm8, zmm, zmm{k}{z} [AVX512F] +// * VGETMANTPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VGETMANTPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VGETMANTPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETMANTPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VGETMANTPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VGETMANTPS takes 3 or 4 operands") + } + // VGETMANTPS imm8, m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x26) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTPS imm8, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETMANTPS") + } + return p +} + +// VGETMANTSD performs "Extract Normalized Mantissa from Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VGETMANTSD +// Supported forms : (3 forms) +// +// * VGETMANTSD imm8, m64, xmm, xmm{k}{z} [AVX512F] +// * VGETMANTSD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VGETMANTSD imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VGETMANTSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETMANTSD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VGETMANTSD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VGETMANTSD takes 4 or 5 operands") + } + // VGETMANTSD imm8, m64, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x27) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTSD imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x27) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTSD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETMANTSD") + } + return p +} + +// VGETMANTSS performs "Extract Normalized Mantissa from Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VGETMANTSS +// Supported forms : (3 forms) +// +// * VGETMANTSS imm8, m32, xmm, xmm{k}{z} [AVX512F] +// * VGETMANTSS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VGETMANTSS imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VGETMANTSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VGETMANTSS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VGETMANTSS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VGETMANTSS takes 4 or 5 operands") + } + // VGETMANTSS imm8, m32, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x27) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTSS imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x27) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VGETMANTSS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VGETMANTSS") + } + return p +} + +// VHADDPD performs "Packed Double-FP Horizontal Add". +// +// Mnemonic : VHADDPD +// Supported forms : (4 forms) +// +// * VHADDPD xmm, xmm, xmm [AVX] +// * VHADDPD m128, xmm, xmm [AVX] +// * VHADDPD ymm, ymm, ymm [AVX] +// * VHADDPD m256, ymm, ymm [AVX] +// +func (self *Program) VHADDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VHADDPD", 3, Operands { v0, v1, v2 }) + // VHADDPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHADDPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VHADDPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHADDPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VHADDPD") + } + return p +} + +// VHADDPS performs "Packed Single-FP Horizontal Add". +// +// Mnemonic : VHADDPS +// Supported forms : (4 forms) +// +// * VHADDPS xmm, xmm, xmm [AVX] +// * VHADDPS m128, xmm, xmm [AVX] +// * VHADDPS ymm, ymm, ymm [AVX] +// * VHADDPS m256, ymm, ymm [AVX] +// +func (self *Program) VHADDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VHADDPS", 3, Operands { v0, v1, v2 }) + // VHADDPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHADDPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VHADDPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHADDPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VHADDPS") + } + return p +} + +// VHSUBPD performs "Packed Double-FP Horizontal Subtract". +// +// Mnemonic : VHSUBPD +// Supported forms : (4 forms) +// +// * VHSUBPD xmm, xmm, xmm [AVX] +// * VHSUBPD m128, xmm, xmm [AVX] +// * VHSUBPD ymm, ymm, ymm [AVX] +// * VHSUBPD m256, ymm, ymm [AVX] +// +func (self *Program) VHSUBPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VHSUBPD", 3, Operands { v0, v1, v2 }) + // VHSUBPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHSUBPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VHSUBPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHSUBPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VHSUBPD") + } + return p +} + +// VHSUBPS performs "Packed Single-FP Horizontal Subtract". +// +// Mnemonic : VHSUBPS +// Supported forms : (4 forms) +// +// * VHSUBPS xmm, xmm, xmm [AVX] +// * VHSUBPS m128, xmm, xmm [AVX] +// * VHSUBPS ymm, ymm, ymm [AVX] +// * VHSUBPS m256, ymm, ymm [AVX] +// +func (self *Program) VHSUBPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VHSUBPS", 3, Operands { v0, v1, v2 }) + // VHSUBPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHSUBPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VHSUBPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VHSUBPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VHSUBPS") + } + return p +} + +// VINSERTF128 performs "Insert Packed Floating-Point Values". +// +// Mnemonic : VINSERTF128 +// Supported forms : (2 forms) +// +// * VINSERTF128 imm8, xmm, ymm, ymm [AVX] +// * VINSERTF128 imm8, m128, ymm, ymm [AVX] +// +func (self *Program) VINSERTF128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF128", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF128 imm8, xmm, ymm, ymm + if isImm8(v0) && isXMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF128 imm8, m128, ymm, ymm + if isImm8(v0) && isM128(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF128") + } + return p +} + +// VINSERTF32X4 performs "Insert 128 Bits of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VINSERTF32X4 +// Supported forms : (4 forms) +// +// * VINSERTF32X4 imm8, xmm, zmm, zmm{k}{z} [AVX512F] +// * VINSERTF32X4 imm8, m128, zmm, zmm{k}{z} [AVX512F] +// * VINSERTF32X4 imm8, xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VINSERTF32X4 imm8, m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VINSERTF32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF32X4", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF32X4 imm8, xmm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF32X4 imm8, m128, zmm, zmm{k}{z} + if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF32X4 imm8, xmm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF32X4 imm8, m128, ymm, ymm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF32X4") + } + return p +} + +// VINSERTF32X8 performs "Insert 256 Bits of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VINSERTF32X8 +// Supported forms : (2 forms) +// +// * VINSERTF32X8 imm8, ymm, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTF32X8 imm8, m256, zmm, zmm{k}{z} [AVX512DQ] +// +func (self *Program) VINSERTF32X8(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF32X8", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF32X8 imm8, ymm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF32X8 imm8, m256, zmm, zmm{k}{z} + if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF32X8") + } + return p +} + +// VINSERTF64X2 performs "Insert 128 Bits of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VINSERTF64X2 +// Supported forms : (4 forms) +// +// * VINSERTF64X2 imm8, xmm, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTF64X2 imm8, m128, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTF64X2 imm8, xmm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VINSERTF64X2 imm8, m128, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VINSERTF64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF64X2", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF64X2 imm8, xmm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF64X2 imm8, m128, zmm, zmm{k}{z} + if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF64X2 imm8, xmm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x18) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF64X2 imm8, m128, ymm, ymm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x18) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF64X2") + } + return p +} + +// VINSERTF64X4 performs "Insert 256 Bits of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VINSERTF64X4 +// Supported forms : (2 forms) +// +// * VINSERTF64X4 imm8, ymm, zmm, zmm{k}{z} [AVX512F] +// * VINSERTF64X4 imm8, m256, zmm, zmm{k}{z} [AVX512F] +// +func (self *Program) VINSERTF64X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTF64X4", 4, Operands { v0, v1, v2, v3 }) + // VINSERTF64X4 imm8, ymm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTF64X4 imm8, m256, zmm, zmm{k}{z} + if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x1a) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTF64X4") + } + return p +} + +// VINSERTI128 performs "Insert Packed Integer Values". +// +// Mnemonic : VINSERTI128 +// Supported forms : (2 forms) +// +// * VINSERTI128 imm8, xmm, ymm, ymm [AVX2] +// * VINSERTI128 imm8, m128, ymm, ymm [AVX2] +// +func (self *Program) VINSERTI128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI128", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI128 imm8, xmm, ymm, ymm + if isImm8(v0) && isXMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI128 imm8, m128, ymm, ymm + if isImm8(v0) && isM128(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI128") + } + return p +} + +// VINSERTI32X4 performs "Insert 128 Bits of Packed Doubleword Integer Values". +// +// Mnemonic : VINSERTI32X4 +// Supported forms : (4 forms) +// +// * VINSERTI32X4 imm8, xmm, zmm, zmm{k}{z} [AVX512F] +// * VINSERTI32X4 imm8, m128, zmm, zmm{k}{z} [AVX512F] +// * VINSERTI32X4 imm8, xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VINSERTI32X4 imm8, m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VINSERTI32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI32X4", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI32X4 imm8, xmm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI32X4 imm8, m128, zmm, zmm{k}{z} + if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI32X4 imm8, xmm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI32X4 imm8, m128, ymm, ymm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI32X4") + } + return p +} + +// VINSERTI32X8 performs "Insert 256 Bits of Packed Doubleword Integer Values". +// +// Mnemonic : VINSERTI32X8 +// Supported forms : (2 forms) +// +// * VINSERTI32X8 imm8, ymm, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTI32X8 imm8, m256, zmm, zmm{k}{z} [AVX512DQ] +// +func (self *Program) VINSERTI32X8(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI32X8", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI32X8 imm8, ymm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI32X8 imm8, m256, zmm, zmm{k}{z} + if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI32X8") + } + return p +} + +// VINSERTI64X2 performs "Insert 128 Bits of Packed Quadword Integer Values". +// +// Mnemonic : VINSERTI64X2 +// Supported forms : (4 forms) +// +// * VINSERTI64X2 imm8, xmm, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTI64X2 imm8, m128, zmm, zmm{k}{z} [AVX512DQ] +// * VINSERTI64X2 imm8, xmm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VINSERTI64X2 imm8, m128, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VINSERTI64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI64X2", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI64X2 imm8, xmm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI64X2 imm8, m128, zmm, zmm{k}{z} + if isImm8(v0) && isM128(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI64X2 imm8, xmm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x38) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI64X2 imm8, m128, ymm, ymm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x38) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI64X2") + } + return p +} + +// VINSERTI64X4 performs "Insert 256 Bits of Packed Quadword Integer Values". +// +// Mnemonic : VINSERTI64X4 +// Supported forms : (2 forms) +// +// * VINSERTI64X4 imm8, ymm, zmm, zmm{k}{z} [AVX512F] +// * VINSERTI64X4 imm8, m256, zmm, zmm{k}{z} [AVX512F] +// +func (self *Program) VINSERTI64X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTI64X4", 4, Operands { v0, v1, v2, v3 }) + // VINSERTI64X4 imm8, ymm, zmm, zmm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTI64X4 imm8, m256, zmm, zmm{k}{z} + if isImm8(v0) && isM256(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTI64X4") + } + return p +} + +// VINSERTPS performs "Insert Packed Single Precision Floating-Point Value". +// +// Mnemonic : VINSERTPS +// Supported forms : (4 forms) +// +// * VINSERTPS imm8, xmm, xmm, xmm [AVX] +// * VINSERTPS imm8, m32, xmm, xmm [AVX] +// * VINSERTPS imm8, xmm, xmm, xmm [AVX512F] +// * VINSERTPS imm8, m32, xmm, xmm [AVX512F] +// +func (self *Program) VINSERTPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VINSERTPS", 4, Operands { v0, v1, v2, v3 }) + // VINSERTPS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x21) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTPS imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x21) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTPS imm8, xmm, xmm, xmm + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x21) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VINSERTPS imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x21) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VINSERTPS") + } + return p +} + +// VLDDQU performs "Load Unaligned Integer 128 Bits". +// +// Mnemonic : VLDDQU +// Supported forms : (2 forms) +// +// * VLDDQU m128, xmm [AVX] +// * VLDDQU m256, ymm [AVX] +// +func (self *Program) VLDDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VLDDQU", 2, Operands { v0, v1 }) + // VLDDQU m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VLDDQU m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), addr(v[0]), 0) + m.emit(0xf0) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VLDDQU") + } + return p +} + +// VLDMXCSR performs "Load MXCSR Register". +// +// Mnemonic : VLDMXCSR +// Supported forms : (1 form) +// +// * VLDMXCSR m32 [AVX] +// +func (self *Program) VLDMXCSR(v0 interface{}) *Instruction { + p := self.alloc("VLDMXCSR", 1, Operands { v0 }) + // VLDMXCSR m32 + if isM32(v0) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, addr(v[0]), 0) + m.emit(0xae) + m.mrsd(2, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VLDMXCSR") + } + return p +} + +// VMASKMOVDQU performs "Store Selected Bytes of Double Quadword". +// +// Mnemonic : VMASKMOVDQU +// Supported forms : (1 form) +// +// * VMASKMOVDQU xmm, xmm [AVX] +// +func (self *Program) VMASKMOVDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMASKMOVDQU", 2, Operands { v0, v1 }) + // VMASKMOVDQU xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0xf7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMASKMOVDQU") + } + return p +} + +// VMASKMOVPD performs "Conditional Move Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMASKMOVPD +// Supported forms : (4 forms) +// +// * VMASKMOVPD m128, xmm, xmm [AVX] +// * VMASKMOVPD m256, ymm, ymm [AVX] +// * VMASKMOVPD xmm, xmm, m128 [AVX] +// * VMASKMOVPD ymm, ymm, m256 [AVX] +// +func (self *Program) VMASKMOVPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VMASKMOVPD", 3, Operands { v0, v1, v2 }) + // VMASKMOVPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMASKMOVPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMASKMOVPD xmm, xmm, m128 + if isXMM(v0) && isXMM(v1) && isM128(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x2f) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + // VMASKMOVPD ymm, ymm, m256 + if isYMM(v0) && isYMM(v1) && isM256(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x2f) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VMASKMOVPD") + } + return p +} + +// VMASKMOVPS performs "Conditional Move Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMASKMOVPS +// Supported forms : (4 forms) +// +// * VMASKMOVPS m128, xmm, xmm [AVX] +// * VMASKMOVPS m256, ymm, ymm [AVX] +// * VMASKMOVPS xmm, xmm, m128 [AVX] +// * VMASKMOVPS ymm, ymm, m256 [AVX] +// +func (self *Program) VMASKMOVPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VMASKMOVPS", 3, Operands { v0, v1, v2 }) + // VMASKMOVPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMASKMOVPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMASKMOVPS xmm, xmm, m128 + if isXMM(v0) && isXMM(v1) && isM128(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x2e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + // VMASKMOVPS ymm, ymm, m256 + if isYMM(v0) && isYMM(v1) && isM256(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x2e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VMASKMOVPS") + } + return p +} + +// VMAXPD performs "Return Maximum Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMAXPD +// Supported forms : (11 forms) +// +// * VMAXPD xmm, xmm, xmm [AVX] +// * VMAXPD m128, xmm, xmm [AVX] +// * VMAXPD ymm, ymm, ymm [AVX] +// * VMAXPD m256, ymm, ymm [AVX] +// * VMAXPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VMAXPD {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMAXPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VMAXPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMAXPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMAXPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMAXPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMAXPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMAXPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMAXPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMAXPD takes 3 or 4 operands") + } + // VMAXPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMAXPD {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMAXPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMAXPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMAXPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMAXPD") + } + return p +} + +// VMAXPS performs "Return Maximum Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMAXPS +// Supported forms : (11 forms) +// +// * VMAXPS xmm, xmm, xmm [AVX] +// * VMAXPS m128, xmm, xmm [AVX] +// * VMAXPS ymm, ymm, ymm [AVX] +// * VMAXPS m256, ymm, ymm [AVX] +// * VMAXPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VMAXPS {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMAXPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VMAXPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMAXPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMAXPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMAXPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMAXPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMAXPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMAXPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMAXPS takes 3 or 4 operands") + } + // VMAXPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMAXPS {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMAXPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMAXPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMAXPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMAXPS") + } + return p +} + +// VMAXSD performs "Return Maximum Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VMAXSD +// Supported forms : (5 forms) +// +// * VMAXSD xmm, xmm, xmm [AVX] +// * VMAXSD m64, xmm, xmm [AVX] +// * VMAXSD m64, xmm, xmm{k}{z} [AVX512F] +// * VMAXSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMAXSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMAXSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMAXSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMAXSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMAXSD takes 3 or 4 operands") + } + // VMAXSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VMAXSD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMAXSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMAXSD") + } + return p +} + +// VMAXSS performs "Return Maximum Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VMAXSS +// Supported forms : (5 forms) +// +// * VMAXSS xmm, xmm, xmm [AVX] +// * VMAXSS m32, xmm, xmm [AVX] +// * VMAXSS m32, xmm, xmm{k}{z} [AVX512F] +// * VMAXSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMAXSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMAXSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMAXSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMAXSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMAXSS takes 3 or 4 operands") + } + // VMAXSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMAXSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMAXSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5f) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VMAXSS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMAXSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMAXSS") + } + return p +} + +// VMINPD performs "Return Minimum Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMINPD +// Supported forms : (11 forms) +// +// * VMINPD xmm, xmm, xmm [AVX] +// * VMINPD m128, xmm, xmm [AVX] +// * VMINPD ymm, ymm, ymm [AVX] +// * VMINPD m256, ymm, ymm [AVX] +// * VMINPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VMINPD {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMINPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VMINPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMINPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMINPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMINPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMINPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMINPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMINPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMINPD takes 3 or 4 operands") + } + // VMINPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMINPD {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMINPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMINPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMINPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMINPD") + } + return p +} + +// VMINPS performs "Return Minimum Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMINPS +// Supported forms : (11 forms) +// +// * VMINPS xmm, xmm, xmm [AVX] +// * VMINPS m128, xmm, xmm [AVX] +// * VMINPS ymm, ymm, ymm [AVX] +// * VMINPS m256, ymm, ymm [AVX] +// * VMINPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VMINPS {sae}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMINPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VMINPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMINPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMINPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMINPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMINPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMINPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMINPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMINPS takes 3 or 4 operands") + } + // VMINPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMINPS {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMINPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMINPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMINPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMINPS") + } + return p +} + +// VMINSD performs "Return Minimum Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VMINSD +// Supported forms : (5 forms) +// +// * VMINSD xmm, xmm, xmm [AVX] +// * VMINSD m64, xmm, xmm [AVX] +// * VMINSD m64, xmm, xmm{k}{z} [AVX512F] +// * VMINSD {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMINSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMINSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMINSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMINSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMINSD takes 3 or 4 operands") + } + // VMINSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VMINSD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMINSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMINSD") + } + return p +} + +// VMINSS performs "Return Minimum Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VMINSS +// Supported forms : (5 forms) +// +// * VMINSS xmm, xmm, xmm [AVX] +// * VMINSS m32, xmm, xmm [AVX] +// * VMINSS m32, xmm, xmm{k}{z} [AVX512F] +// * VMINSS {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMINSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMINSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMINSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMINSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMINSS takes 3 or 4 operands") + } + // VMINSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMINSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMINSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5d) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VMINSS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMINSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMINSS") + } + return p +} + +// VMOVAPD performs "Move Aligned Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMOVAPD +// Supported forms : (15 forms) +// +// * VMOVAPD xmm, xmm [AVX] +// * VMOVAPD m128, xmm [AVX] +// * VMOVAPD ymm, ymm [AVX] +// * VMOVAPD m256, ymm [AVX] +// * VMOVAPD xmm, m128 [AVX] +// * VMOVAPD ymm, m256 [AVX] +// * VMOVAPD zmm, m512{k}{z} [AVX512F] +// * VMOVAPD zmm, zmm{k}{z} [AVX512F] +// * VMOVAPD m512, zmm{k}{z} [AVX512F] +// * VMOVAPD xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPD m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVAPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVAPD", 2, Operands { v0, v1 }) + // VMOVAPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVAPD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), v[1], 0) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVAPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVAPD ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVAPD zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVAPD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVAPD xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVAPD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVAPD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPD m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVAPD m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVAPD") + } + return p +} + +// VMOVAPS performs "Move Aligned Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVAPS +// Supported forms : (15 forms) +// +// * VMOVAPS xmm, xmm [AVX] +// * VMOVAPS m128, xmm [AVX] +// * VMOVAPS ymm, ymm [AVX] +// * VMOVAPS m256, ymm [AVX] +// * VMOVAPS xmm, m128 [AVX] +// * VMOVAPS ymm, m256 [AVX] +// * VMOVAPS zmm, m512{k}{z} [AVX512F] +// * VMOVAPS zmm, zmm{k}{z} [AVX512F] +// * VMOVAPS m512, zmm{k}{z} [AVX512F] +// * VMOVAPS xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVAPS m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVAPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVAPS", 2, Operands { v0, v1 }) + // VMOVAPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), v[1], 0) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVAPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), v[1], 0) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVAPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVAPS ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), addr(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVAPS zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVAPS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVAPS xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVAPS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x29) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVAPS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x29) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVAPS m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVAPS m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x28) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVAPS") + } + return p +} + +// VMOVD performs "Move Doubleword". +// +// Mnemonic : VMOVD +// Supported forms : (8 forms) +// +// * VMOVD xmm, r32 [AVX] +// * VMOVD r32, xmm [AVX] +// * VMOVD m32, xmm [AVX] +// * VMOVD xmm, m32 [AVX] +// * VMOVD xmm, r32 [AVX512F] +// * VMOVD r32, xmm [AVX512F] +// * VMOVD m32, xmm [AVX512F] +// * VMOVD xmm, m32 [AVX512F] +// +func (self *Program) VMOVD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVD", 2, Operands { v0, v1 }) + // VMOVD xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVD r32, xmm + if isReg32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVD xmm, m32 + if isXMM(v0) && isM32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVD xmm, r32 + if isEVEXXMM(v0) && isReg32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVD r32, xmm + if isReg32(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVD m32, xmm + if isM32(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VMOVD xmm, m32 + if isEVEXXMM(v0) && isM32(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVD") + } + return p +} + +// VMOVDDUP performs "Move One Double-FP and Duplicate". +// +// Mnemonic : VMOVDDUP +// Supported forms : (10 forms) +// +// * VMOVDDUP xmm, xmm [AVX] +// * VMOVDDUP m64, xmm [AVX] +// * VMOVDDUP ymm, ymm [AVX] +// * VMOVDDUP m256, ymm [AVX] +// * VMOVDDUP zmm, zmm{k}{z} [AVX512F] +// * VMOVDDUP m512, zmm{k}{z} [AVX512F] +// * VMOVDDUP xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDDUP ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDDUP m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDDUP m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDDUP", 2, Operands { v0, v1 }) + // VMOVDDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), v[0], 0) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDDUP ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), v[0], 0) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[1]), addr(v[0]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDDUP zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDDUP xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVDDUP m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VMOVDDUP m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDDUP") + } + return p +} + +// VMOVDQA performs "Move Aligned Double Quadword". +// +// Mnemonic : VMOVDQA +// Supported forms : (6 forms) +// +// * VMOVDQA xmm, xmm [AVX] +// * VMOVDQA m128, xmm [AVX] +// * VMOVDQA ymm, ymm [AVX] +// * VMOVDQA m256, ymm [AVX] +// * VMOVDQA xmm, m128 [AVX] +// * VMOVDQA ymm, m256 [AVX] +// +func (self *Program) VMOVDQA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQA", 2, Operands { v0, v1 }) + // VMOVDQA xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDQA ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), v[1], 0) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDQA xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVDQA ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQA") + } + return p +} + +// VMOVDQA32 performs "Move Aligned Doubleword Values". +// +// Mnemonic : VMOVDQA32 +// Supported forms : (9 forms) +// +// * VMOVDQA32 zmm, m512{k}{z} [AVX512F] +// * VMOVDQA32 zmm, zmm{k}{z} [AVX512F] +// * VMOVDQA32 m512, zmm{k}{z} [AVX512F] +// * VMOVDQA32 xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA32 m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDQA32(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQA32", 2, Operands { v0, v1 }) + // VMOVDQA32 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQA32 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA32 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQA32 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQA32 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA32 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQA32 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA32 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQA32 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQA32") + } + return p +} + +// VMOVDQA64 performs "Move Aligned Quadword Values". +// +// Mnemonic : VMOVDQA64 +// Supported forms : (9 forms) +// +// * VMOVDQA64 zmm, m512{k}{z} [AVX512F] +// * VMOVDQA64 zmm, zmm{k}{z} [AVX512F] +// * VMOVDQA64 m512, zmm{k}{z} [AVX512F] +// * VMOVDQA64 xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQA64 m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDQA64(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQA64", 2, Operands { v0, v1 }) + // VMOVDQA64 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQA64 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA64 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQA64 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQA64 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA64 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQA64 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQA64 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQA64 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQA64") + } + return p +} + +// VMOVDQU performs "Move Unaligned Double Quadword". +// +// Mnemonic : VMOVDQU +// Supported forms : (6 forms) +// +// * VMOVDQU xmm, xmm [AVX] +// * VMOVDQU m128, xmm [AVX] +// * VMOVDQU ymm, ymm [AVX] +// * VMOVDQU m256, ymm [AVX] +// * VMOVDQU xmm, m128 [AVX] +// * VMOVDQU ymm, m256 [AVX] +// +func (self *Program) VMOVDQU(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU", 2, Operands { v0, v1 }) + // VMOVDQU xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[0]), v[1], 0) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDQU ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[0]), v[1], 0) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVDQU xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVDQU ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU") + } + return p +} + +// VMOVDQU16 performs "Move Unaligned Word Values". +// +// Mnemonic : VMOVDQU16 +// Supported forms : (9 forms) +// +// * VMOVDQU16 zmm, m512{k}{z} [AVX512BW] +// * VMOVDQU16 zmm, zmm{k}{z} [AVX512BW] +// * VMOVDQU16 m512, zmm{k}{z} [AVX512BW] +// * VMOVDQU16 xmm, m128{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 ymm, m256{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU16 m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VMOVDQU16(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU16", 2, Operands { v0, v1 }) + // VMOVDQU16 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQU16 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU16 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQU16 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQU16 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU16 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQU16 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xff) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU16 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQU16 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU16") + } + return p +} + +// VMOVDQU32 performs "Move Unaligned Doubleword Values". +// +// Mnemonic : VMOVDQU32 +// Supported forms : (9 forms) +// +// * VMOVDQU32 zmm, m512{k}{z} [AVX512F] +// * VMOVDQU32 zmm, zmm{k}{z} [AVX512F] +// * VMOVDQU32 m512, zmm{k}{z} [AVX512F] +// * VMOVDQU32 xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU32 m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDQU32(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU32", 2, Operands { v0, v1 }) + // VMOVDQU32 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQU32 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU32 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQU32 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQU32 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU32 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQU32 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU32 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQU32 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU32") + } + return p +} + +// VMOVDQU64 performs "Move Unaligned Quadword Values". +// +// Mnemonic : VMOVDQU64 +// Supported forms : (9 forms) +// +// * VMOVDQU64 zmm, m512{k}{z} [AVX512F] +// * VMOVDQU64 zmm, zmm{k}{z} [AVX512F] +// * VMOVDQU64 m512, zmm{k}{z} [AVX512F] +// * VMOVDQU64 xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVDQU64 m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVDQU64(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU64", 2, Operands { v0, v1 }) + // VMOVDQU64 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQU64 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU64 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQU64 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQU64 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU64 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQU64 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfe) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU64 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQU64 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU64") + } + return p +} + +// VMOVDQU8 performs "Move Unaligned Byte Values". +// +// Mnemonic : VMOVDQU8 +// Supported forms : (9 forms) +// +// * VMOVDQU8 zmm, m512{k}{z} [AVX512BW] +// * VMOVDQU8 zmm, zmm{k}{z} [AVX512BW] +// * VMOVDQU8 m512, zmm{k}{z} [AVX512BW] +// * VMOVDQU8 xmm, m128{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 ymm, m256{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VMOVDQU8 m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VMOVDQU8(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVDQU8", 2, Operands { v0, v1 }) + // VMOVDQU8 zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVDQU8 zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU8 m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVDQU8 xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVDQU8 xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU8 ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x7f) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVDQU8 ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x6f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVDQU8 m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVDQU8 m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x6f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVDQU8") + } + return p +} + +// VMOVHLPS performs "Move Packed Single-Precision Floating-Point Values High to Low". +// +// Mnemonic : VMOVHLPS +// Supported forms : (2 forms) +// +// * VMOVHLPS xmm, xmm, xmm [AVX] +// * VMOVHLPS xmm, xmm, xmm [AVX512F] +// +func (self *Program) VMOVHLPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VMOVHLPS", 3, Operands { v0, v1, v2 }) + // VMOVHLPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMOVHLPS xmm, xmm, xmm + if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVHLPS") + } + return p +} + +// VMOVHPD performs "Move High Packed Double-Precision Floating-Point Value". +// +// Mnemonic : VMOVHPD +// Supported forms : (4 forms) +// +// * VMOVHPD xmm, m64 [AVX] +// * VMOVHPD m64, xmm, xmm [AVX] +// * VMOVHPD xmm, m64 [AVX512F] +// * VMOVHPD m64, xmm, xmm [AVX512F] +// +func (self *Program) VMOVHPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVHPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVHPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVHPD takes 2 or 3 operands") + } + // VMOVHPD xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVHPD m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMOVHPD xmm, m64 + if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVHPD m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVHPD") + } + return p +} + +// VMOVHPS performs "Move High Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVHPS +// Supported forms : (4 forms) +// +// * VMOVHPS xmm, m64 [AVX] +// * VMOVHPS m64, xmm, xmm [AVX] +// * VMOVHPS xmm, m64 [AVX512F] +// * VMOVHPS m64, xmm, xmm [AVX512F] +// +func (self *Program) VMOVHPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVHPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVHPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVHPS takes 2 or 3 operands") + } + // VMOVHPS xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVHPS m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMOVHPS xmm, m64 + if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x17) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVHPS m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVHPS") + } + return p +} + +// VMOVLHPS performs "Move Packed Single-Precision Floating-Point Values Low to High". +// +// Mnemonic : VMOVLHPS +// Supported forms : (2 forms) +// +// * VMOVLHPS xmm, xmm, xmm [AVX] +// * VMOVLHPS xmm, xmm, xmm [AVX512F] +// +func (self *Program) VMOVLHPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VMOVLHPS", 3, Operands { v0, v1, v2 }) + // VMOVLHPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMOVLHPS xmm, xmm, xmm + if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVLHPS") + } + return p +} + +// VMOVLPD performs "Move Low Packed Double-Precision Floating-Point Value". +// +// Mnemonic : VMOVLPD +// Supported forms : (4 forms) +// +// * VMOVLPD xmm, m64 [AVX] +// * VMOVLPD m64, xmm, xmm [AVX] +// * VMOVLPD xmm, m64 [AVX512F] +// * VMOVLPD m64, xmm, xmm [AVX512F] +// +func (self *Program) VMOVLPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVLPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVLPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVLPD takes 2 or 3 operands") + } + // VMOVLPD xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVLPD m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMOVLPD xmm, m64 + if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVLPD m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVLPD") + } + return p +} + +// VMOVLPS performs "Move Low Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVLPS +// Supported forms : (4 forms) +// +// * VMOVLPS xmm, m64 [AVX] +// * VMOVLPS m64, xmm, xmm [AVX] +// * VMOVLPS xmm, m64 [AVX512F] +// * VMOVLPS m64, xmm, xmm [AVX512F] +// +func (self *Program) VMOVLPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVLPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVLPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVLPS takes 2 or 3 operands") + } + // VMOVLPS xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVLPS m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMOVLPS xmm, m64 + if len(vv) == 0 && isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVLPS m64, xmm, xmm + if len(vv) == 1 && isM64(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVLPS") + } + return p +} + +// VMOVMSKPD performs "Extract Packed Double-Precision Floating-Point Sign Mask". +// +// Mnemonic : VMOVMSKPD +// Supported forms : (2 forms) +// +// * VMOVMSKPD xmm, r32 [AVX] +// * VMOVMSKPD ymm, r32 [AVX] +// +func (self *Program) VMOVMSKPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVMSKPD", 2, Operands { v0, v1 }) + // VMOVMSKPD xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVMSKPD ymm, r32 + if isYMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVMSKPD") + } + return p +} + +// VMOVMSKPS performs "Extract Packed Single-Precision Floating-Point Sign Mask". +// +// Mnemonic : VMOVMSKPS +// Supported forms : (2 forms) +// +// * VMOVMSKPS xmm, r32 [AVX] +// * VMOVMSKPS ymm, r32 [AVX] +// +func (self *Program) VMOVMSKPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVMSKPS", 2, Operands { v0, v1 }) + // VMOVMSKPS xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVMSKPS ymm, r32 + if isYMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x50) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVMSKPS") + } + return p +} + +// VMOVNTDQ performs "Store Double Quadword Using Non-Temporal Hint". +// +// Mnemonic : VMOVNTDQ +// Supported forms : (5 forms) +// +// * VMOVNTDQ xmm, m128 [AVX] +// * VMOVNTDQ ymm, m256 [AVX] +// * VMOVNTDQ zmm, m512 [AVX512F] +// * VMOVNTDQ xmm, m128 [AVX512F,AVX512VL] +// * VMOVNTDQ ymm, m256 [AVX512F,AVX512VL] +// +func (self *Program) VMOVNTDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVNTDQ", 2, Operands { v0, v1 }) + // VMOVNTDQ xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTDQ ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTDQ zmm, m512 + if isZMM(v0) && isM512(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVNTDQ xmm, m128 + if isEVEXXMM(v0) && isM128(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVNTDQ ymm, m256 + if isEVEXYMM(v0) && isM256(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0xe7) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVNTDQ") + } + return p +} + +// VMOVNTDQA performs "Load Double Quadword Non-Temporal Aligned Hint". +// +// Mnemonic : VMOVNTDQA +// Supported forms : (5 forms) +// +// * VMOVNTDQA m128, xmm [AVX] +// * VMOVNTDQA m256, ymm [AVX2] +// * VMOVNTDQA m512, zmm [AVX512F] +// * VMOVNTDQA m128, xmm [AVX512F,AVX512VL] +// * VMOVNTDQA m256, ymm [AVX512F,AVX512VL] +// +func (self *Program) VMOVNTDQA(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVNTDQA", 2, Operands { v0, v1 }) + // VMOVNTDQA m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVNTDQA m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVNTDQA m512, zmm + if isM512(v0) && isZMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVNTDQA m128, xmm + if isM128(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVNTDQA m256, ymm + if isM256(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2a) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVNTDQA") + } + return p +} + +// VMOVNTPD performs "Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : VMOVNTPD +// Supported forms : (5 forms) +// +// * VMOVNTPD xmm, m128 [AVX] +// * VMOVNTPD ymm, m256 [AVX] +// * VMOVNTPD zmm, m512 [AVX512F] +// * VMOVNTPD xmm, m128 [AVX512F,AVX512VL] +// * VMOVNTPD ymm, m256 [AVX512F,AVX512VL] +// +func (self *Program) VMOVNTPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVNTPD", 2, Operands { v0, v1 }) + // VMOVNTPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTPD ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTPD zmm, m512 + if isZMM(v0) && isM512(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVNTPD xmm, m128 + if isEVEXXMM(v0) && isM128(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVNTPD ymm, m256 + if isEVEXYMM(v0) && isM256(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVNTPD") + } + return p +} + +// VMOVNTPS performs "Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint". +// +// Mnemonic : VMOVNTPS +// Supported forms : (5 forms) +// +// * VMOVNTPS xmm, m128 [AVX] +// * VMOVNTPS ymm, m256 [AVX] +// * VMOVNTPS zmm, m512 [AVX512F] +// * VMOVNTPS xmm, m128 [AVX512F,AVX512VL] +// * VMOVNTPS ymm, m256 [AVX512F,AVX512VL] +// +func (self *Program) VMOVNTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVNTPS", 2, Operands { v0, v1 }) + // VMOVNTPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTPS ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), addr(v[1]), 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVNTPS zmm, m512 + if isZMM(v0) && isM512(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVNTPS xmm, m128 + if isEVEXXMM(v0) && isM128(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVNTPS ymm, m256 + if isEVEXYMM(v0) && isM256(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x2b) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVNTPS") + } + return p +} + +// VMOVQ performs "Move Quadword". +// +// Mnemonic : VMOVQ +// Supported forms : (10 forms) +// +// * VMOVQ xmm, r64 [AVX] +// * VMOVQ r64, xmm [AVX] +// * VMOVQ xmm, xmm [AVX] +// * VMOVQ m64, xmm [AVX] +// * VMOVQ xmm, m64 [AVX] +// * VMOVQ xmm, r64 [AVX512F] +// * VMOVQ r64, xmm [AVX512F] +// * VMOVQ xmm, xmm [AVX512F] +// * VMOVQ m64, xmm [AVX512F] +// * VMOVQ xmm, m64 [AVX512F] +// +func (self *Program) VMOVQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVQ", 2, Operands { v0, v1 }) + // VMOVQ xmm, r64 + if isXMM(v0) && isReg64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[0]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVQ r64, xmm + if isReg64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe1 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x7e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x81, hcode(v[1]), addr(v[0]), 0) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVQ xmm, m64 + if isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0xd6) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b1, 0x81, hcode(v[0]), addr(v[1]), 0) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVQ xmm, r64 + if isEVEXXMM(v0) && isReg64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit(0x08) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVQ r64, xmm + if isReg64(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit(0x08) + m.emit(0x6e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVQ xmm, xmm + if isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit(0x08) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVQ m64, xmm + if isM64(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x6e) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x86, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x7e) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VMOVQ xmm, m64 + if isEVEXXMM(v0) && isM64(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0x7e) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, 0, 0, 0) + m.emit(0xd6) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVQ") + } + return p +} + +// VMOVSD performs "Move Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VMOVSD +// Supported forms : (6 forms) +// +// * VMOVSD m64, xmm [AVX] +// * VMOVSD xmm, m64 [AVX] +// * VMOVSD xmm, xmm, xmm [AVX] +// * VMOVSD xmm, m64{k} [AVX512F] +// * VMOVSD m64, xmm{k}{z} [AVX512F] +// * VMOVSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMOVSD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVSD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVSD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVSD takes 2 or 3 operands") + } + // VMOVSD m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSD xmm, m64 + if len(vv) == 0 && isXMM(v0) && isM64(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVSD xmm, xmm, xmm + if len(vv) == 1 && isXMM(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[0]), v[2], hlcode(v[1])) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2])) + }) + } + // VMOVSD xmm, m64{k} + if len(vv) == 0 && isEVEXXMM(v0) && isM64k(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VMOVSD m64, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VMOVSD xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVSD") + } + return p +} + +// VMOVSHDUP performs "Move Packed Single-FP High and Duplicate". +// +// Mnemonic : VMOVSHDUP +// Supported forms : (10 forms) +// +// * VMOVSHDUP xmm, xmm [AVX] +// * VMOVSHDUP m128, xmm [AVX] +// * VMOVSHDUP ymm, ymm [AVX] +// * VMOVSHDUP m256, ymm [AVX] +// * VMOVSHDUP zmm, zmm{k}{z} [AVX512F] +// * VMOVSHDUP m512, zmm{k}{z} [AVX512F] +// * VMOVSHDUP xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVSHDUP ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVSHDUP m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVSHDUP m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVSHDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVSHDUP", 2, Operands { v0, v1 }) + // VMOVSHDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSHDUP ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSHDUP zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVSHDUP xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSHDUP m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVSHDUP m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVSHDUP") + } + return p +} + +// VMOVSLDUP performs "Move Packed Single-FP Low and Duplicate". +// +// Mnemonic : VMOVSLDUP +// Supported forms : (10 forms) +// +// * VMOVSLDUP xmm, xmm [AVX] +// * VMOVSLDUP m128, xmm [AVX] +// * VMOVSLDUP ymm, ymm [AVX] +// * VMOVSLDUP m256, ymm [AVX] +// * VMOVSLDUP zmm, zmm{k}{z} [AVX512F] +// * VMOVSLDUP m512, zmm{k}{z} [AVX512F] +// * VMOVSLDUP xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVSLDUP ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVSLDUP m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVSLDUP m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVSLDUP(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVSLDUP", 2, Operands { v0, v1 }) + // VMOVSLDUP xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), v[0], 0) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSLDUP ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), v[0], 0) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[1]), addr(v[0]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSLDUP zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVSLDUP xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x12) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VMOVSLDUP m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVSLDUP m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVSLDUP") + } + return p +} + +// VMOVSS performs "Move Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVSS +// Supported forms : (6 forms) +// +// * VMOVSS m32, xmm [AVX] +// * VMOVSS xmm, m32 [AVX] +// * VMOVSS xmm, xmm, xmm [AVX] +// * VMOVSS xmm, m32{k} [AVX512F] +// * VMOVSS m32, xmm{k}{z} [AVX512F] +// * VMOVSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMOVSS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMOVSS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VMOVSS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VMOVSS takes 2 or 3 operands") + } + // VMOVSS m32, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVSS xmm, m32 + if len(vv) == 0 && isXMM(v0) && isM32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVSS xmm, xmm, xmm + if len(vv) == 1 && isXMM(v0) && isXMM(v1) && isXMM(vv[0]) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[0]), v[2], hlcode(v[1])) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2])) + }) + } + // VMOVSS xmm, m32{k} + if len(vv) == 0 && isEVEXXMM(v0) && isM32k(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VMOVSS m32, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VMOVSS xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[2])) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVSS") + } + return p +} + +// VMOVUPD performs "Move Unaligned Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMOVUPD +// Supported forms : (15 forms) +// +// * VMOVUPD xmm, xmm [AVX] +// * VMOVUPD m128, xmm [AVX] +// * VMOVUPD ymm, ymm [AVX] +// * VMOVUPD m256, ymm [AVX] +// * VMOVUPD xmm, m128 [AVX] +// * VMOVUPD ymm, m256 [AVX] +// * VMOVUPD zmm, m512{k}{z} [AVX512F] +// * VMOVUPD zmm, zmm{k}{z} [AVX512F] +// * VMOVUPD m512, zmm{k}{z} [AVX512F] +// * VMOVUPD xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPD m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVUPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVUPD", 2, Operands { v0, v1 }) + // VMOVUPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), v[1], 0) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVUPD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), v[1], 0) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVUPD xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVUPD ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVUPD zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVUPD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVUPD xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVUPD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVUPD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPD m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVUPD m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVUPD") + } + return p +} + +// VMOVUPS performs "Move Unaligned Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMOVUPS +// Supported forms : (15 forms) +// +// * VMOVUPS xmm, xmm [AVX] +// * VMOVUPS m128, xmm [AVX] +// * VMOVUPS ymm, ymm [AVX] +// * VMOVUPS m256, ymm [AVX] +// * VMOVUPS xmm, m128 [AVX] +// * VMOVUPS ymm, m256 [AVX] +// * VMOVUPS zmm, m512{k}{z} [AVX512F] +// * VMOVUPS zmm, zmm{k}{z} [AVX512F] +// * VMOVUPS m512, zmm{k}{z} [AVX512F] +// * VMOVUPS xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS ymm, m256{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VMOVUPS m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMOVUPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VMOVUPS", 2, Operands { v0, v1 }) + // VMOVUPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), v[1], 0) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVUPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), v[1], 0) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VMOVUPS xmm, m128 + if isXMM(v0) && isM128(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVUPS ymm, m256 + if isYMM(v0) && isM256(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[0]), addr(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + // VMOVUPS zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 64) + }) + } + // VMOVUPS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VMOVUPS xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VMOVUPS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VMOVUPS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x10) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VMOVUPS m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VMOVUPS m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VMOVUPS") + } + return p +} + +// VMPSADBW performs "Compute Multiple Packed Sums of Absolute Difference". +// +// Mnemonic : VMPSADBW +// Supported forms : (4 forms) +// +// * VMPSADBW imm8, xmm, xmm, xmm [AVX] +// * VMPSADBW imm8, m128, xmm, xmm [AVX] +// * VMPSADBW imm8, ymm, ymm, ymm [AVX2] +// * VMPSADBW imm8, m256, ymm, ymm [AVX2] +// +func (self *Program) VMPSADBW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VMPSADBW", 4, Operands { v0, v1, v2, v3 }) + // VMPSADBW imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VMPSADBW imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VMPSADBW imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x42) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VMPSADBW imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x42) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMPSADBW") + } + return p +} + +// VMULPD performs "Multiply Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VMULPD +// Supported forms : (11 forms) +// +// * VMULPD xmm, xmm, xmm [AVX] +// * VMULPD m128, xmm, xmm [AVX] +// * VMULPD ymm, ymm, ymm [AVX] +// * VMULPD m256, ymm, ymm [AVX] +// * VMULPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VMULPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMULPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VMULPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMULPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMULPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMULPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMULPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMULPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMULPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMULPD takes 3 or 4 operands") + } + // VMULPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMULPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x59) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMULPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMULPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMULPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMULPD") + } + return p +} + +// VMULPS performs "Multiply Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VMULPS +// Supported forms : (11 forms) +// +// * VMULPS xmm, xmm, xmm [AVX] +// * VMULPS m128, xmm, xmm [AVX] +// * VMULPS ymm, ymm, ymm [AVX] +// * VMULPS m256, ymm, ymm [AVX] +// * VMULPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VMULPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VMULPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VMULPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMULPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VMULPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VMULPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VMULPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMULPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMULPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMULPS takes 3 or 4 operands") + } + // VMULPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VMULPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x59) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMULPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VMULPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VMULPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMULPS") + } + return p +} + +// VMULSD performs "Multiply Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VMULSD +// Supported forms : (5 forms) +// +// * VMULSD xmm, xmm, xmm [AVX] +// * VMULSD m64, xmm, xmm [AVX] +// * VMULSD m64, xmm, xmm{k}{z} [AVX512F] +// * VMULSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMULSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMULSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMULSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMULSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMULSD takes 3 or 4 operands") + } + // VMULSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VMULSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x59) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMULSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMULSD") + } + return p +} + +// VMULSS performs "Multiply Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VMULSS +// Supported forms : (5 forms) +// +// * VMULSS xmm, xmm, xmm [AVX] +// * VMULSS m32, xmm, xmm [AVX] +// * VMULSS m32, xmm, xmm{k}{z} [AVX512F] +// * VMULSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VMULSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VMULSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VMULSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VMULSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VMULSS takes 3 or 4 operands") + } + // VMULSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VMULSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VMULSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x59) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VMULSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x59) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VMULSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x59) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VMULSS") + } + return p +} + +// VORPD performs "Bitwise Logical OR of Double-Precision Floating-Point Values". +// +// Mnemonic : VORPD +// Supported forms : (10 forms) +// +// * VORPD xmm, xmm, xmm [AVX] +// * VORPD m128, xmm, xmm [AVX] +// * VORPD ymm, ymm, ymm [AVX] +// * VORPD m256, ymm, ymm [AVX] +// * VORPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VORPD zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VORPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VORPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VORPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VORPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VORPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VORPD", 3, Operands { v0, v1, v2 }) + // VORPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VORPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VORPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VORPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VORPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VORPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VORPD") + } + return p +} + +// VORPS performs "Bitwise Logical OR of Single-Precision Floating-Point Values". +// +// Mnemonic : VORPS +// Supported forms : (10 forms) +// +// * VORPS xmm, xmm, xmm [AVX] +// * VORPS m128, xmm, xmm [AVX] +// * VORPS ymm, ymm, ymm [AVX] +// * VORPS m256, ymm, ymm [AVX] +// * VORPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VORPS zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VORPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VORPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VORPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VORPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VORPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VORPS", 3, Operands { v0, v1, v2 }) + // VORPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VORPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VORPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VORPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VORPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VORPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VORPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VORPS") + } + return p +} + +// VPABSB performs "Packed Absolute Value of Byte Integers". +// +// Mnemonic : VPABSB +// Supported forms : (10 forms) +// +// * VPABSB xmm, xmm [AVX] +// * VPABSB m128, xmm [AVX] +// * VPABSB ymm, ymm [AVX2] +// * VPABSB m256, ymm [AVX2] +// * VPABSB zmm, zmm{k}{z} [AVX512BW] +// * VPABSB m512, zmm{k}{z} [AVX512BW] +// * VPABSB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPABSB ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPABSB m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPABSB m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPABSB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPABSB", 2, Operands { v0, v1 }) + // VPABSB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSB ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSB zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPABSB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x1c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSB m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPABSB m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1c) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPABSB") + } + return p +} + +// VPABSD performs "Packed Absolute Value of Doubleword Integers". +// +// Mnemonic : VPABSD +// Supported forms : (10 forms) +// +// * VPABSD xmm, xmm [AVX] +// * VPABSD m128, xmm [AVX] +// * VPABSD ymm, ymm [AVX2] +// * VPABSD m256, ymm [AVX2] +// * VPABSD m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPABSD zmm, zmm{k}{z} [AVX512F] +// * VPABSD m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPABSD m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPABSD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPABSD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPABSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPABSD", 2, Operands { v0, v1 }) + // VPABSD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSD m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPABSD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSD m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPABSD m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1e) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPABSD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPABSD") + } + return p +} + +// VPABSQ performs "Packed Absolute Value of Quadword Integers". +// +// Mnemonic : VPABSQ +// Supported forms : (6 forms) +// +// * VPABSQ m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPABSQ zmm, zmm{k}{z} [AVX512F] +// * VPABSQ m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPABSQ m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPABSQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPABSQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPABSQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPABSQ", 2, Operands { v0, v1 }) + // VPABSQ m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1f) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPABSQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSQ m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1f) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPABSQ m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x1f) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPABSQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPABSQ") + } + return p +} + +// VPABSW performs "Packed Absolute Value of Word Integers". +// +// Mnemonic : VPABSW +// Supported forms : (10 forms) +// +// * VPABSW xmm, xmm [AVX] +// * VPABSW m128, xmm [AVX] +// * VPABSW ymm, ymm [AVX2] +// * VPABSW m256, ymm [AVX2] +// * VPABSW zmm, zmm{k}{z} [AVX512BW] +// * VPABSW m512, zmm{k}{z} [AVX512BW] +// * VPABSW xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPABSW ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPABSW m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPABSW m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPABSW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPABSW", 2, Operands { v0, v1 }) + // VPABSW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSW ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPABSW zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPABSW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x1d) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPABSW m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPABSW m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x1d) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPABSW") + } + return p +} + +// VPACKSSDW performs "Pack Doublewords into Words with Signed Saturation". +// +// Mnemonic : VPACKSSDW +// Supported forms : (10 forms) +// +// * VPACKSSDW xmm, xmm, xmm [AVX] +// * VPACKSSDW m128, xmm, xmm [AVX] +// * VPACKSSDW ymm, ymm, ymm [AVX2] +// * VPACKSSDW m256, ymm, ymm [AVX2] +// * VPACKSSDW m512/m32bcst, zmm, zmm{k}{z} [AVX512BW] +// * VPACKSSDW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPACKSSDW m128/m32bcst, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSDW m256/m32bcst, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPACKSSDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPACKSSDW", 3, Operands { v0, v1, v2 }) + // VPACKSSDW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSDW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKSSDW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSDW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKSSDW m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPACKSSDW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSDW m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPACKSSDW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSDW m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPACKSSDW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x6b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPACKSSDW") + } + return p +} + +// VPACKSSWB performs "Pack Words into Bytes with Signed Saturation". +// +// Mnemonic : VPACKSSWB +// Supported forms : (10 forms) +// +// * VPACKSSWB xmm, xmm, xmm [AVX] +// * VPACKSSWB m128, xmm, xmm [AVX] +// * VPACKSSWB ymm, ymm, ymm [AVX2] +// * VPACKSSWB m256, ymm, ymm [AVX2] +// * VPACKSSWB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPACKSSWB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPACKSSWB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSWB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSWB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPACKSSWB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPACKSSWB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPACKSSWB", 3, Operands { v0, v1, v2 }) + // VPACKSSWB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKSSWB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKSSWB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPACKSSWB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPACKSSWB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKSSWB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPACKSSWB") + } + return p +} + +// VPACKUSDW performs "Pack Doublewords into Words with Unsigned Saturation". +// +// Mnemonic : VPACKUSDW +// Supported forms : (10 forms) +// +// * VPACKUSDW xmm, xmm, xmm [AVX] +// * VPACKUSDW m128, xmm, xmm [AVX] +// * VPACKUSDW ymm, ymm, ymm [AVX2] +// * VPACKUSDW m256, ymm, ymm [AVX2] +// * VPACKUSDW m512/m32bcst, zmm, zmm{k}{z} [AVX512BW] +// * VPACKUSDW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPACKUSDW m128/m32bcst, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSDW m256/m32bcst, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPACKUSDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPACKUSDW", 3, Operands { v0, v1, v2 }) + // VPACKUSDW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSDW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKUSDW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSDW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKUSDW m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPACKUSDW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSDW m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPACKUSDW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSDW m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPACKUSDW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x2b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPACKUSDW") + } + return p +} + +// VPACKUSWB performs "Pack Words into Bytes with Unsigned Saturation". +// +// Mnemonic : VPACKUSWB +// Supported forms : (10 forms) +// +// * VPACKUSWB xmm, xmm, xmm [AVX] +// * VPACKUSWB m128, xmm, xmm [AVX] +// * VPACKUSWB ymm, ymm, ymm [AVX2] +// * VPACKUSWB m256, ymm, ymm [AVX2] +// * VPACKUSWB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPACKUSWB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPACKUSWB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSWB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSWB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPACKUSWB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPACKUSWB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPACKUSWB", 3, Operands { v0, v1, v2 }) + // VPACKUSWB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKUSWB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPACKUSWB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPACKUSWB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPACKUSWB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x67) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPACKUSWB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x67) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPACKUSWB") + } + return p +} + +// VPADDB performs "Add Packed Byte Integers". +// +// Mnemonic : VPADDB +// Supported forms : (10 forms) +// +// * VPADDB xmm, xmm, xmm [AVX] +// * VPADDB m128, xmm, xmm [AVX] +// * VPADDB ymm, ymm, ymm [AVX2] +// * VPADDB m256, ymm, ymm [AVX2] +// * VPADDB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDB", 3, Operands { v0, v1, v2 }) + // VPADDB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfc) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDB") + } + return p +} + +// VPADDD performs "Add Packed Doubleword Integers". +// +// Mnemonic : VPADDD +// Supported forms : (10 forms) +// +// * VPADDD xmm, xmm, xmm [AVX] +// * VPADDD m128, xmm, xmm [AVX] +// * VPADDD ymm, ymm, ymm [AVX2] +// * VPADDD m256, ymm, ymm [AVX2] +// * VPADDD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPADDD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPADDD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPADDD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPADDD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPADDD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDD", 3, Operands { v0, v1, v2 }) + // VPADDD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfe) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPADDD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfe) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDD") + } + return p +} + +// VPADDQ performs "Add Packed Quadword Integers". +// +// Mnemonic : VPADDQ +// Supported forms : (10 forms) +// +// * VPADDQ xmm, xmm, xmm [AVX] +// * VPADDQ m128, xmm, xmm [AVX] +// * VPADDQ ymm, ymm, ymm [AVX2] +// * VPADDQ m256, ymm, ymm [AVX2] +// * VPADDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPADDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPADDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPADDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPADDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPADDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPADDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDQ", 3, Operands { v0, v1, v2 }) + // VPADDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xd4) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPADDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDQ") + } + return p +} + +// VPADDSB performs "Add Packed Signed Byte Integers with Signed Saturation". +// +// Mnemonic : VPADDSB +// Supported forms : (10 forms) +// +// * VPADDSB xmm, xmm, xmm [AVX] +// * VPADDSB m128, xmm, xmm [AVX] +// * VPADDSB ymm, ymm, ymm [AVX2] +// * VPADDSB m256, ymm, ymm [AVX2] +// * VPADDSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDSB", 3, Operands { v0, v1, v2 }) + // VPADDSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xec) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xec) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDSB") + } + return p +} + +// VPADDSW performs "Add Packed Signed Word Integers with Signed Saturation". +// +// Mnemonic : VPADDSW +// Supported forms : (10 forms) +// +// * VPADDSW xmm, xmm, xmm [AVX] +// * VPADDSW m128, xmm, xmm [AVX] +// * VPADDSW ymm, ymm, ymm [AVX2] +// * VPADDSW m256, ymm, ymm [AVX2] +// * VPADDSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDSW", 3, Operands { v0, v1, v2 }) + // VPADDSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xed) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xed) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDSW") + } + return p +} + +// VPADDUSB performs "Add Packed Unsigned Byte Integers with Unsigned Saturation". +// +// Mnemonic : VPADDUSB +// Supported forms : (10 forms) +// +// * VPADDUSB xmm, xmm, xmm [AVX] +// * VPADDUSB m128, xmm, xmm [AVX] +// * VPADDUSB ymm, ymm, ymm [AVX2] +// * VPADDUSB m256, ymm, ymm [AVX2] +// * VPADDUSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDUSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDUSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDUSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDUSB", 3, Operands { v0, v1, v2 }) + // VPADDUSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDUSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDUSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDUSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDUSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdc) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDUSB") + } + return p +} + +// VPADDUSW performs "Add Packed Unsigned Word Integers with Unsigned Saturation". +// +// Mnemonic : VPADDUSW +// Supported forms : (10 forms) +// +// * VPADDUSW xmm, xmm, xmm [AVX] +// * VPADDUSW m128, xmm, xmm [AVX] +// * VPADDUSW ymm, ymm, ymm [AVX2] +// * VPADDUSW m256, ymm, ymm [AVX2] +// * VPADDUSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDUSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDUSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDUSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDUSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDUSW", 3, Operands { v0, v1, v2 }) + // VPADDUSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDUSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDUSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDUSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDUSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDUSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xdd) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDUSW") + } + return p +} + +// VPADDW performs "Add Packed Word Integers". +// +// Mnemonic : VPADDW +// Supported forms : (10 forms) +// +// * VPADDW xmm, xmm, xmm [AVX] +// * VPADDW m128, xmm, xmm [AVX] +// * VPADDW ymm, ymm, ymm [AVX2] +// * VPADDW m256, ymm, ymm [AVX2] +// * VPADDW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPADDW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPADDW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPADDW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPADDW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPADDW", 3, Operands { v0, v1, v2 }) + // VPADDW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPADDW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPADDW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPADDW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPADDW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xfd) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPADDW") + } + return p +} + +// VPALIGNR performs "Packed Align Right". +// +// Mnemonic : VPALIGNR +// Supported forms : (10 forms) +// +// * VPALIGNR imm8, xmm, xmm, xmm [AVX] +// * VPALIGNR imm8, m128, xmm, xmm [AVX] +// * VPALIGNR imm8, ymm, ymm, ymm [AVX2] +// * VPALIGNR imm8, m256, ymm, ymm [AVX2] +// * VPALIGNR imm8, zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPALIGNR imm8, m512, zmm, zmm{k}{z} [AVX512BW] +// * VPALIGNR imm8, xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPALIGNR imm8, m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPALIGNR imm8, ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPALIGNR imm8, m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPALIGNR(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPALIGNR", 4, Operands { v0, v1, v2, v3 }) + // VPALIGNR imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m512, zmm, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m128, xmm, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPALIGNR imm8, m256, ymm, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPALIGNR") + } + return p +} + +// VPAND performs "Packed Bitwise Logical AND". +// +// Mnemonic : VPAND +// Supported forms : (4 forms) +// +// * VPAND xmm, xmm, xmm [AVX] +// * VPAND m128, xmm, xmm [AVX] +// * VPAND ymm, ymm, ymm [AVX2] +// * VPAND m256, ymm, ymm [AVX2] +// +func (self *Program) VPAND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPAND", 3, Operands { v0, v1, v2 }) + // VPAND xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAND m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAND ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAND m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPAND") + } + return p +} + +// VPANDD performs "Bitwise Logical AND of Packed Doubleword Integers". +// +// Mnemonic : VPANDD +// Supported forms : (6 forms) +// +// * VPANDD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPANDD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPANDD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPANDD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPANDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDD", 3, Operands { v0, v1, v2 }) + // VPANDD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPANDD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPANDD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPANDD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDD") + } + return p +} + +// VPANDN performs "Packed Bitwise Logical AND NOT". +// +// Mnemonic : VPANDN +// Supported forms : (4 forms) +// +// * VPANDN xmm, xmm, xmm [AVX] +// * VPANDN m128, xmm, xmm [AVX] +// * VPANDN ymm, ymm, ymm [AVX2] +// * VPANDN m256, ymm, ymm [AVX2] +// +func (self *Program) VPANDN(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDN", 3, Operands { v0, v1, v2 }) + // VPANDN xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDN m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPANDN ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDN m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDN") + } + return p +} + +// VPANDND performs "Bitwise Logical AND NOT of Packed Doubleword Integers". +// +// Mnemonic : VPANDND +// Supported forms : (6 forms) +// +// * VPANDND m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPANDND zmm, zmm, zmm{k}{z} [AVX512F] +// * VPANDND m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDND xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDND m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPANDND ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPANDND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDND", 3, Operands { v0, v1, v2 }) + // VPANDND m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPANDND zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDND m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPANDND xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDND m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPANDND ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDND") + } + return p +} + +// VPANDNQ performs "Bitwise Logical AND NOT of Packed Quadword Integers". +// +// Mnemonic : VPANDNQ +// Supported forms : (6 forms) +// +// * VPANDNQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPANDNQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPANDNQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDNQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDNQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPANDNQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPANDNQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDNQ", 3, Operands { v0, v1, v2 }) + // VPANDNQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPANDNQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDNQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPANDNQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDNQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdf) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPANDNQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdf) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDNQ") + } + return p +} + +// VPANDQ performs "Bitwise Logical AND of Packed Quadword Integers". +// +// Mnemonic : VPANDQ +// Supported forms : (6 forms) +// +// * VPANDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPANDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPANDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPANDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPANDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPANDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPANDQ", 3, Operands { v0, v1, v2 }) + // VPANDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPANDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPANDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPANDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xdb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPANDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPANDQ") + } + return p +} + +// VPAVGB performs "Average Packed Byte Integers". +// +// Mnemonic : VPAVGB +// Supported forms : (10 forms) +// +// * VPAVGB xmm, xmm, xmm [AVX] +// * VPAVGB m128, xmm, xmm [AVX] +// * VPAVGB ymm, ymm, ymm [AVX2] +// * VPAVGB m256, ymm, ymm [AVX2] +// * VPAVGB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPAVGB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPAVGB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPAVGB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPAVGB", 3, Operands { v0, v1, v2 }) + // VPAVGB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAVGB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAVGB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPAVGB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPAVGB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe0) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPAVGB") + } + return p +} + +// VPAVGW performs "Average Packed Word Integers". +// +// Mnemonic : VPAVGW +// Supported forms : (10 forms) +// +// * VPAVGW xmm, xmm, xmm [AVX] +// * VPAVGW m128, xmm, xmm [AVX] +// * VPAVGW ymm, ymm, ymm [AVX2] +// * VPAVGW m256, ymm, ymm [AVX2] +// * VPAVGW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPAVGW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPAVGW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPAVGW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPAVGW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPAVGW", 3, Operands { v0, v1, v2 }) + // VPAVGW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAVGW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPAVGW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPAVGW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPAVGW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPAVGW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe3) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPAVGW") + } + return p +} + +// VPBLENDD performs "Blend Packed Doublewords". +// +// Mnemonic : VPBLENDD +// Supported forms : (4 forms) +// +// * VPBLENDD imm8, xmm, xmm, xmm [AVX2] +// * VPBLENDD imm8, m128, xmm, xmm [AVX2] +// * VPBLENDD imm8, ymm, ymm, ymm [AVX2] +// * VPBLENDD imm8, m256, ymm, ymm [AVX2] +// +func (self *Program) VPBLENDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPBLENDD", 4, Operands { v0, v1, v2, v3 }) + // VPBLENDD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x02) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x02) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDD imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x02) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDD imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x02) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDD") + } + return p +} + +// VPBLENDMB performs "Blend Byte Vectors Using an OpMask Control". +// +// Mnemonic : VPBLENDMB +// Supported forms : (6 forms) +// +// * VPBLENDMB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPBLENDMB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPBLENDMB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPBLENDMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPBLENDMB", 3, Operands { v0, v1, v2 }) + // VPBLENDMB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPBLENDMB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPBLENDMB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDMB") + } + return p +} + +// VPBLENDMD performs "Blend Doubleword Vectors Using an OpMask Control". +// +// Mnemonic : VPBLENDMD +// Supported forms : (6 forms) +// +// * VPBLENDMD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPBLENDMD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPBLENDMD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPBLENDMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPBLENDMD", 3, Operands { v0, v1, v2 }) + // VPBLENDMD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPBLENDMD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPBLENDMD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPBLENDMD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDMD") + } + return p +} + +// VPBLENDMQ performs "Blend Quadword Vectors Using an OpMask Control". +// +// Mnemonic : VPBLENDMQ +// Supported forms : (6 forms) +// +// * VPBLENDMQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPBLENDMQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPBLENDMQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBLENDMQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPBLENDMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPBLENDMQ", 3, Operands { v0, v1, v2 }) + // VPBLENDMQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPBLENDMQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPBLENDMQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPBLENDMQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDMQ") + } + return p +} + +// VPBLENDMW performs "Blend Word Vectors Using an OpMask Control". +// +// Mnemonic : VPBLENDMW +// Supported forms : (6 forms) +// +// * VPBLENDMW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPBLENDMW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPBLENDMW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBLENDMW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPBLENDMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPBLENDMW", 3, Operands { v0, v1, v2 }) + // VPBLENDMW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPBLENDMW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPBLENDMW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPBLENDMW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDMW") + } + return p +} + +// VPBLENDVB performs "Variable Blend Packed Bytes". +// +// Mnemonic : VPBLENDVB +// Supported forms : (4 forms) +// +// * VPBLENDVB xmm, xmm, xmm, xmm [AVX] +// * VPBLENDVB xmm, m128, xmm, xmm [AVX] +// * VPBLENDVB ymm, ymm, ymm, ymm [AVX2] +// * VPBLENDVB ymm, m256, ymm, ymm [AVX2] +// +func (self *Program) VPBLENDVB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPBLENDVB", 4, Operands { v0, v1, v2, v3 }) + // VPBLENDVB xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPBLENDVB xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPBLENDVB ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPBLENDVB ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x4c) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDVB") + } + return p +} + +// VPBLENDW performs "Blend Packed Words". +// +// Mnemonic : VPBLENDW +// Supported forms : (4 forms) +// +// * VPBLENDW imm8, xmm, xmm, xmm [AVX] +// * VPBLENDW imm8, m128, xmm, xmm [AVX] +// * VPBLENDW imm8, ymm, ymm, ymm [AVX2] +// * VPBLENDW imm8, m256, ymm, ymm [AVX2] +// +func (self *Program) VPBLENDW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPBLENDW", 4, Operands { v0, v1, v2, v3 }) + // VPBLENDW imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDW imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDW imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPBLENDW imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBLENDW") + } + return p +} + +// VPBROADCASTB performs "Broadcast Byte Integer". +// +// Mnemonic : VPBROADCASTB +// Supported forms : (13 forms) +// +// * VPBROADCASTB xmm, xmm [AVX2] +// * VPBROADCASTB m8, xmm [AVX2] +// * VPBROADCASTB xmm, ymm [AVX2] +// * VPBROADCASTB m8, ymm [AVX2] +// * VPBROADCASTB r32, zmm{k}{z} [AVX512BW] +// * VPBROADCASTB xmm, zmm{k}{z} [AVX512BW] +// * VPBROADCASTB m8, zmm{k}{z} [AVX512BW] +// * VPBROADCASTB r32, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB r32, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB xmm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB m8, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTB m8, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPBROADCASTB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTB", 2, Operands { v0, v1 }) + // VPBROADCASTB xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB m8, xmm + if isM8(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTB xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB m8, ymm + if isM8(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTB r32, zmm{k}{z} + if isReg32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB m8, zmm{k}{z} + if isM8(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTB r32, xmm{k}{z} + if isReg32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB r32, ymm{k}{z} + if isReg32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x78) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTB m8, xmm{k}{z} + if isM8(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTB m8, ymm{k}{z} + if isM8(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x78) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTB") + } + return p +} + +// VPBROADCASTD performs "Broadcast Doubleword Integer". +// +// Mnemonic : VPBROADCASTD +// Supported forms : (13 forms) +// +// * VPBROADCASTD xmm, xmm [AVX2] +// * VPBROADCASTD m32, xmm [AVX2] +// * VPBROADCASTD xmm, ymm [AVX2] +// * VPBROADCASTD m32, ymm [AVX2] +// * VPBROADCASTD r32, zmm{k}{z} [AVX512F] +// * VPBROADCASTD xmm, zmm{k}{z} [AVX512F] +// * VPBROADCASTD m32, zmm{k}{z} [AVX512F] +// * VPBROADCASTD r32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD r32, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTD m32, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPBROADCASTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTD", 2, Operands { v0, v1 }) + // VPBROADCASTD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD m32, ymm + if isM32(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTD r32, zmm{k}{z} + if isReg32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD m32, zmm{k}{z} + if isM32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPBROADCASTD r32, xmm{k}{z} + if isReg32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD r32, ymm{k}{z} + if isReg32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x58) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTD m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPBROADCASTD m32, ymm{k}{z} + if isM32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x58) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTD") + } + return p +} + +// VPBROADCASTMB2Q performs "Broadcast Low Byte of Mask Register to Packed Quadword Values". +// +// Mnemonic : VPBROADCASTMB2Q +// Supported forms : (3 forms) +// +// * VPBROADCASTMB2Q k, xmm [AVX512CD,AVX512VL] +// * VPBROADCASTMB2Q k, ymm [AVX512CD,AVX512VL] +// * VPBROADCASTMB2Q k, zmm [AVX512CD] +// +func (self *Program) VPBROADCASTMB2Q(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTMB2Q", 2, Operands { v0, v1 }) + // VPBROADCASTMB2Q k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTMB2Q k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTMB2Q k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x2a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTMB2Q") + } + return p +} + +// VPBROADCASTMW2D performs "Broadcast Low Word of Mask Register to Packed Doubleword Values". +// +// Mnemonic : VPBROADCASTMW2D +// Supported forms : (3 forms) +// +// * VPBROADCASTMW2D k, xmm [AVX512CD,AVX512VL] +// * VPBROADCASTMW2D k, ymm [AVX512CD,AVX512VL] +// * VPBROADCASTMW2D k, zmm [AVX512CD] +// +func (self *Program) VPBROADCASTMW2D(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTMW2D", 2, Operands { v0, v1 }) + // VPBROADCASTMW2D k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTMW2D k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTMW2D k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTMW2D") + } + return p +} + +// VPBROADCASTQ performs "Broadcast Quadword Integer". +// +// Mnemonic : VPBROADCASTQ +// Supported forms : (13 forms) +// +// * VPBROADCASTQ xmm, xmm [AVX2] +// * VPBROADCASTQ m64, xmm [AVX2] +// * VPBROADCASTQ xmm, ymm [AVX2] +// * VPBROADCASTQ m64, ymm [AVX2] +// * VPBROADCASTQ r64, zmm{k}{z} [AVX512F] +// * VPBROADCASTQ xmm, zmm{k}{z} [AVX512F] +// * VPBROADCASTQ m64, zmm{k}{z} [AVX512F] +// * VPBROADCASTQ r64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ r64, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPBROADCASTQ m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPBROADCASTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTQ", 2, Operands { v0, v1 }) + // VPBROADCASTQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTQ r64, zmm{k}{z} + if isReg64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPBROADCASTQ r64, xmm{k}{z} + if isReg64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ r64, ymm{k}{z} + if isReg64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x59) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTQ m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPBROADCASTQ m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x59) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTQ") + } + return p +} + +// VPBROADCASTW performs "Broadcast Word Integer". +// +// Mnemonic : VPBROADCASTW +// Supported forms : (13 forms) +// +// * VPBROADCASTW xmm, xmm [AVX2] +// * VPBROADCASTW m16, xmm [AVX2] +// * VPBROADCASTW xmm, ymm [AVX2] +// * VPBROADCASTW m16, ymm [AVX2] +// * VPBROADCASTW r32, zmm{k}{z} [AVX512BW] +// * VPBROADCASTW xmm, zmm{k}{z} [AVX512BW] +// * VPBROADCASTW m16, zmm{k}{z} [AVX512BW] +// * VPBROADCASTW r32, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW r32, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW xmm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW m16, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPBROADCASTW m16, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPBROADCASTW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPBROADCASTW", 2, Operands { v0, v1 }) + // VPBROADCASTW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTW xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW m16, ymm + if isM16(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPBROADCASTW r32, zmm{k}{z} + if isReg32(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW m16, zmm{k}{z} + if isM16(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + // VPBROADCASTW r32, xmm{k}{z} + if isReg32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW r32, ymm{k}{z} + if isReg32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x7b) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x79) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPBROADCASTW m16, xmm{k}{z} + if isM16(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + // VPBROADCASTW m16, ymm{k}{z} + if isM16(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x79) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + if p.len == 0 { + panic("invalid operands for VPBROADCASTW") + } + return p +} + +// VPCLMULQDQ performs "Carry-Less Quadword Multiplication". +// +// Mnemonic : VPCLMULQDQ +// Supported forms : (2 forms) +// +// * VPCLMULQDQ imm8, xmm, xmm, xmm [AVX,PCLMULQDQ] +// * VPCLMULQDQ imm8, m128, xmm, xmm [AVX,PCLMULQDQ] +// +func (self *Program) VPCLMULQDQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCLMULQDQ", 4, Operands { v0, v1, v2, v3 }) + // VPCLMULQDQ imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX | ISA_PCLMULQDQ) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x44) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCLMULQDQ imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX | ISA_PCLMULQDQ) + p.domain = DomainCrypto + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x44) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCLMULQDQ") + } + return p +} + +// VPCMOV performs "Packed Conditional Move". +// +// Mnemonic : VPCMOV +// Supported forms : (6 forms) +// +// * VPCMOV xmm, xmm, xmm, xmm [XOP] +// * VPCMOV m128, xmm, xmm, xmm [XOP] +// * VPCMOV xmm, m128, xmm, xmm [XOP] +// * VPCMOV ymm, ymm, ymm, ymm [XOP] +// * VPCMOV m256, ymm, ymm, ymm [XOP] +// * VPCMOV ymm, m256, ymm, ymm [XOP] +// +func (self *Program) VPCMOV(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMOV", 4, Operands { v0, v1, v2, v3 }) + // VPCMOV xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xa2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[2]) << 3)) + m.emit(0xa2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPCMOV m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x80, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0xa2) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPCMOV xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xa2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPCMOV ymm, ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit(0xa2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfc ^ (hlcode(v[2]) << 3)) + m.emit(0xa2) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPCMOV m256, ymm, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x84, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0xa2) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPCMOV ymm, m256, ymm, ymm + if isYMM(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x04, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xa2) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMOV") + } + return p +} + +// VPCMPB performs "Compare Packed Signed Byte Values". +// +// Mnemonic : VPCMPB +// Supported forms : (6 forms) +// +// * VPCMPB imm8, zmm, zmm, k{k} [AVX512BW] +// * VPCMPB imm8, m512, zmm, k{k} [AVX512BW] +// * VPCMPB imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPB imm8, m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPB imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPB imm8, m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPB", 4, Operands { v0, v1, v2, v3 }) + // VPCMPB imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, m512, zmm, k{k} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, m128, xmm, k{k} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPB imm8, m256, ymm, k{k} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPB") + } + return p +} + +// VPCMPD performs "Compare Packed Signed Doubleword Values". +// +// Mnemonic : VPCMPD +// Supported forms : (6 forms) +// +// * VPCMPD imm8, m512/m32bcst, zmm, k{k} [AVX512F] +// * VPCMPD imm8, zmm, zmm, k{k} [AVX512F] +// * VPCMPD imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPD imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPD", 4, Operands { v0, v1, v2, v3 }) + // VPCMPD imm8, m512/m32bcst, zmm, k{k} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, m128/m32bcst, xmm, k{k} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, m256/m32bcst, ymm, k{k} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPD imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPD") + } + return p +} + +// VPCMPEQB performs "Compare Packed Byte Data for Equality". +// +// Mnemonic : VPCMPEQB +// Supported forms : (10 forms) +// +// * VPCMPEQB xmm, xmm, xmm [AVX] +// * VPCMPEQB m128, xmm, xmm [AVX] +// * VPCMPEQB ymm, ymm, ymm [AVX2] +// * VPCMPEQB m256, ymm, ymm [AVX2] +// * VPCMPEQB zmm, zmm, k{k} [AVX512BW] +// * VPCMPEQB m512, zmm, k{k} [AVX512BW] +// * VPCMPEQB xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQB m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQB ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQB m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPEQB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPEQB", 3, Operands { v0, v1, v2 }) + // VPCMPEQB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQB zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPEQB xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPEQB ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x74) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQB m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x74) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPEQB") + } + return p +} + +// VPCMPEQD performs "Compare Packed Doubleword Data for Equality". +// +// Mnemonic : VPCMPEQD +// Supported forms : (10 forms) +// +// * VPCMPEQD xmm, xmm, xmm [AVX] +// * VPCMPEQD m128, xmm, xmm [AVX] +// * VPCMPEQD ymm, ymm, ymm [AVX2] +// * VPCMPEQD m256, ymm, ymm [AVX2] +// * VPCMPEQD m512/m32bcst, zmm, k{k} [AVX512F] +// * VPCMPEQD zmm, zmm, k{k} [AVX512F] +// * VPCMPEQD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQD xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQD ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPEQD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPEQD", 3, Operands { v0, v1, v2 }) + // VPCMPEQD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQD m512/m32bcst, zmm, k{k} + if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPEQD zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQD m128/m32bcst, xmm, k{k} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPEQD xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQD m256/m32bcst, ymm, k{k} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPCMPEQD ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPEQD") + } + return p +} + +// VPCMPEQQ performs "Compare Packed Quadword Data for Equality". +// +// Mnemonic : VPCMPEQQ +// Supported forms : (10 forms) +// +// * VPCMPEQQ xmm, xmm, xmm [AVX] +// * VPCMPEQQ m128, xmm, xmm [AVX] +// * VPCMPEQQ ymm, ymm, ymm [AVX2] +// * VPCMPEQQ m256, ymm, ymm [AVX2] +// * VPCMPEQQ m512/m64bcst, zmm, k{k} [AVX512F] +// * VPCMPEQQ zmm, zmm, k{k} [AVX512F] +// * VPCMPEQQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQQ xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPEQQ ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPEQQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPEQQ", 3, Operands { v0, v1, v2 }) + // VPCMPEQQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQQ m512/m64bcst, zmm, k{k} + if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPEQQ zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQQ m128/m64bcst, xmm, k{k} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPEQQ xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQQ m256/m64bcst, ymm, k{k} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x29) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPCMPEQQ ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x29) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPEQQ") + } + return p +} + +// VPCMPEQW performs "Compare Packed Word Data for Equality". +// +// Mnemonic : VPCMPEQW +// Supported forms : (10 forms) +// +// * VPCMPEQW xmm, xmm, xmm [AVX] +// * VPCMPEQW m128, xmm, xmm [AVX] +// * VPCMPEQW ymm, ymm, ymm [AVX2] +// * VPCMPEQW m256, ymm, ymm [AVX2] +// * VPCMPEQW zmm, zmm, k{k} [AVX512BW] +// * VPCMPEQW m512, zmm, k{k} [AVX512BW] +// * VPCMPEQW xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQW m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQW ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPEQW m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPEQW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPEQW", 3, Operands { v0, v1, v2 }) + // VPCMPEQW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPEQW zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPEQW xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPEQW ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPEQW m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPEQW") + } + return p +} + +// VPCMPESTRI performs "Packed Compare Explicit Length Strings, Return Index". +// +// Mnemonic : VPCMPESTRI +// Supported forms : (2 forms) +// +// * VPCMPESTRI imm8, xmm, xmm [AVX] +// * VPCMPESTRI imm8, m128, xmm [AVX] +// +func (self *Program) VPCMPESTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPESTRI", 3, Operands { v0, v1, v2 }) + // VPCMPESTRI imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPESTRI imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPESTRI") + } + return p +} + +// VPCMPESTRM performs "Packed Compare Explicit Length Strings, Return Mask". +// +// Mnemonic : VPCMPESTRM +// Supported forms : (2 forms) +// +// * VPCMPESTRM imm8, xmm, xmm [AVX] +// * VPCMPESTRM imm8, m128, xmm [AVX] +// +func (self *Program) VPCMPESTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPESTRM", 3, Operands { v0, v1, v2 }) + // VPCMPESTRM imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPESTRM imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPESTRM") + } + return p +} + +// VPCMPGTB performs "Compare Packed Signed Byte Integers for Greater Than". +// +// Mnemonic : VPCMPGTB +// Supported forms : (10 forms) +// +// * VPCMPGTB xmm, xmm, xmm [AVX] +// * VPCMPGTB m128, xmm, xmm [AVX] +// * VPCMPGTB ymm, ymm, ymm [AVX2] +// * VPCMPGTB m256, ymm, ymm [AVX2] +// * VPCMPGTB zmm, zmm, k{k} [AVX512BW] +// * VPCMPGTB m512, zmm, k{k} [AVX512BW] +// * VPCMPGTB xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTB m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTB ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTB m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPGTB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPGTB", 3, Operands { v0, v1, v2 }) + // VPCMPGTB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTB zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPGTB xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPGTB ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x64) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTB m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x64) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPGTB") + } + return p +} + +// VPCMPGTD performs "Compare Packed Signed Doubleword Integers for Greater Than". +// +// Mnemonic : VPCMPGTD +// Supported forms : (10 forms) +// +// * VPCMPGTD xmm, xmm, xmm [AVX] +// * VPCMPGTD m128, xmm, xmm [AVX] +// * VPCMPGTD ymm, ymm, ymm [AVX2] +// * VPCMPGTD m256, ymm, ymm [AVX2] +// * VPCMPGTD m512/m32bcst, zmm, k{k} [AVX512F] +// * VPCMPGTD zmm, zmm, k{k} [AVX512F] +// * VPCMPGTD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTD xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTD ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPGTD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPGTD", 3, Operands { v0, v1, v2 }) + // VPCMPGTD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTD m512/m32bcst, zmm, k{k} + if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPGTD zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTD m128/m32bcst, xmm, k{k} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPGTD xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTD m256/m32bcst, ymm, k{k} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x66) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPCMPGTD ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x66) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPGTD") + } + return p +} + +// VPCMPGTQ performs "Compare Packed Data for Greater Than". +// +// Mnemonic : VPCMPGTQ +// Supported forms : (10 forms) +// +// * VPCMPGTQ xmm, xmm, xmm [AVX] +// * VPCMPGTQ m128, xmm, xmm [AVX] +// * VPCMPGTQ ymm, ymm, ymm [AVX2] +// * VPCMPGTQ m256, ymm, ymm [AVX2] +// * VPCMPGTQ m512/m64bcst, zmm, k{k} [AVX512F] +// * VPCMPGTQ zmm, zmm, k{k} [AVX512F] +// * VPCMPGTQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTQ xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPGTQ ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPGTQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPGTQ", 3, Operands { v0, v1, v2 }) + // VPCMPGTQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTQ m512/m64bcst, zmm, k{k} + if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPGTQ zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTQ m128/m64bcst, xmm, k{k} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPGTQ xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTQ m256/m64bcst, ymm, k{k} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x37) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPCMPGTQ ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x37) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPGTQ") + } + return p +} + +// VPCMPGTW performs "Compare Packed Signed Word Integers for Greater Than". +// +// Mnemonic : VPCMPGTW +// Supported forms : (10 forms) +// +// * VPCMPGTW xmm, xmm, xmm [AVX] +// * VPCMPGTW m128, xmm, xmm [AVX] +// * VPCMPGTW ymm, ymm, ymm [AVX2] +// * VPCMPGTW m256, ymm, ymm [AVX2] +// * VPCMPGTW zmm, zmm, k{k} [AVX512BW] +// * VPCMPGTW m512, zmm, k{k} [AVX512BW] +// * VPCMPGTW xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTW m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTW ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPGTW m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPGTW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPGTW", 3, Operands { v0, v1, v2 }) + // VPCMPGTW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPCMPGTW zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPCMPGTW xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPCMPGTW ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x65) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPCMPGTW m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x65) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPGTW") + } + return p +} + +// VPCMPISTRI performs "Packed Compare Implicit Length Strings, Return Index". +// +// Mnemonic : VPCMPISTRI +// Supported forms : (2 forms) +// +// * VPCMPISTRI imm8, xmm, xmm [AVX] +// * VPCMPISTRI imm8, m128, xmm [AVX] +// +func (self *Program) VPCMPISTRI(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPISTRI", 3, Operands { v0, v1, v2 }) + // VPCMPISTRI imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x63) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPISTRI imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x63) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPISTRI") + } + return p +} + +// VPCMPISTRM performs "Packed Compare Implicit Length Strings, Return Mask". +// +// Mnemonic : VPCMPISTRM +// Supported forms : (2 forms) +// +// * VPCMPISTRM imm8, xmm, xmm [AVX] +// * VPCMPISTRM imm8, m128, xmm [AVX] +// +func (self *Program) VPCMPISTRM(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPCMPISTRM", 3, Operands { v0, v1, v2 }) + // VPCMPISTRM imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPISTRM imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPISTRM") + } + return p +} + +// VPCMPQ performs "Compare Packed Signed Quadword Values". +// +// Mnemonic : VPCMPQ +// Supported forms : (6 forms) +// +// * VPCMPQ imm8, m512/m64bcst, zmm, k{k} [AVX512F] +// * VPCMPQ imm8, zmm, zmm, k{k} [AVX512F] +// * VPCMPQ imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPQ imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPQ imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPQ imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPQ", 4, Operands { v0, v1, v2, v3 }) + // VPCMPQ imm8, m512/m64bcst, zmm, k{k} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, m128/m64bcst, xmm, k{k} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, m256/m64bcst, ymm, k{k} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPQ imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x1f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPQ") + } + return p +} + +// VPCMPUB performs "Compare Packed Unsigned Byte Values". +// +// Mnemonic : VPCMPUB +// Supported forms : (6 forms) +// +// * VPCMPUB imm8, zmm, zmm, k{k} [AVX512BW] +// * VPCMPUB imm8, m512, zmm, k{k} [AVX512BW] +// * VPCMPUB imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUB imm8, m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUB imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUB imm8, m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPUB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPUB", 4, Operands { v0, v1, v2, v3 }) + // VPCMPUB imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, m512, zmm, k{k} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, m128, xmm, k{k} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUB imm8, m256, ymm, k{k} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPUB") + } + return p +} + +// VPCMPUD performs "Compare Packed Unsigned Doubleword Values". +// +// Mnemonic : VPCMPUD +// Supported forms : (6 forms) +// +// * VPCMPUD imm8, m512/m32bcst, zmm, k{k} [AVX512F] +// * VPCMPUD imm8, zmm, zmm, k{k} [AVX512F] +// * VPCMPUD imm8, m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPUD imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPUD imm8, m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPUD imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPUD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPUD", 4, Operands { v0, v1, v2, v3 }) + // VPCMPUD imm8, m512/m32bcst, zmm, k{k} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, m128/m32bcst, xmm, k{k} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, m256/m32bcst, ymm, k{k} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUD imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPUD") + } + return p +} + +// VPCMPUQ performs "Compare Packed Unsigned Quadword Values". +// +// Mnemonic : VPCMPUQ +// Supported forms : (6 forms) +// +// * VPCMPUQ imm8, m512/m64bcst, zmm, k{k} [AVX512F] +// * VPCMPUQ imm8, zmm, zmm, k{k} [AVX512F] +// * VPCMPUQ imm8, m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPUQ imm8, xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPCMPUQ imm8, m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPCMPUQ imm8, ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPCMPUQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPUQ", 4, Operands { v0, v1, v2, v3 }) + // VPCMPUQ imm8, m512/m64bcst, zmm, k{k} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, m128/m64bcst, xmm, k{k} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, m256/m64bcst, ymm, k{k} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, bcode(v[1])) + m.emit(0x1e) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUQ imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x1e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPUQ") + } + return p +} + +// VPCMPUW performs "Compare Packed Unsigned Word Values". +// +// Mnemonic : VPCMPUW +// Supported forms : (6 forms) +// +// * VPCMPUW imm8, zmm, zmm, k{k} [AVX512BW] +// * VPCMPUW imm8, m512, zmm, k{k} [AVX512BW] +// * VPCMPUW imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUW imm8, m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUW imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPUW imm8, m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPUW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPUW", 4, Operands { v0, v1, v2, v3 }) + // VPCMPUW imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, m512, zmm, k{k} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, m128, xmm, k{k} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPUW imm8, m256, ymm, k{k} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3e) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPUW") + } + return p +} + +// VPCMPW performs "Compare Packed Signed Word Values". +// +// Mnemonic : VPCMPW +// Supported forms : (6 forms) +// +// * VPCMPW imm8, zmm, zmm, k{k} [AVX512BW] +// * VPCMPW imm8, m512, zmm, k{k} [AVX512BW] +// * VPCMPW imm8, xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPW imm8, m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPCMPW imm8, ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPCMPW imm8, m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPCMPW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCMPW", 4, Operands { v0, v1, v2, v3 }) + // VPCMPW imm8, zmm, zmm, k{k} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, m512, zmm, k{k} + if isImm8(v0) && isM512(v1) && isZMM(v2) && isKk(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, xmm, xmm, k{k} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, m128, xmm, k{k} + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, ymm, ymm, k{k} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCMPW imm8, m256, ymm, k{k} + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) && isKk(v3) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), 0, 0) + m.emit(0x3f) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCMPW") + } + return p +} + +// VPCOMB performs "Compare Packed Signed Byte Integers". +// +// Mnemonic : VPCOMB +// Supported forms : (2 forms) +// +// * VPCOMB imm8, xmm, xmm, xmm [XOP] +// * VPCOMB imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMB", 4, Operands { v0, v1, v2, v3 }) + // VPCOMB imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMB imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xcc) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMB") + } + return p +} + +// VPCOMD performs "Compare Packed Signed Doubleword Integers". +// +// Mnemonic : VPCOMD +// Supported forms : (2 forms) +// +// * VPCOMD imm8, xmm, xmm, xmm [XOP] +// * VPCOMD imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMD", 4, Operands { v0, v1, v2, v3 }) + // VPCOMD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xce) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xce) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMD") + } + return p +} + +// VPCOMPRESSD performs "Store Sparse Packed Doubleword Integer Values into Dense Memory/Register". +// +// Mnemonic : VPCOMPRESSD +// Supported forms : (6 forms) +// +// * VPCOMPRESSD zmm, zmm{k}{z} [AVX512F] +// * VPCOMPRESSD zmm, m512{k}{z} [AVX512F] +// * VPCOMPRESSD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSD xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSD ymm, m256{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPCOMPRESSD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPCOMPRESSD", 2, Operands { v0, v1 }) + // VPCOMPRESSD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSD zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPCOMPRESSD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSD xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPCOMPRESSD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSD ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMPRESSD") + } + return p +} + +// VPCOMPRESSQ performs "Store Sparse Packed Quadword Integer Values into Dense Memory/Register". +// +// Mnemonic : VPCOMPRESSQ +// Supported forms : (6 forms) +// +// * VPCOMPRESSQ zmm, zmm{k}{z} [AVX512F] +// * VPCOMPRESSQ zmm, m512{k}{z} [AVX512F] +// * VPCOMPRESSQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSQ xmm, m128{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPCOMPRESSQ ymm, m256{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPCOMPRESSQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPCOMPRESSQ", 2, Operands { v0, v1 }) + // VPCOMPRESSQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSQ zmm, m512{k}{z} + if isZMM(v0) && isM512kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPCOMPRESSQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSQ xmm, m128{k}{z} + if isEVEXXMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPCOMPRESSQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x8b) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPCOMPRESSQ ymm, m256{k}{z} + if isEVEXYMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x8b) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMPRESSQ") + } + return p +} + +// VPCOMQ performs "Compare Packed Signed Quadword Integers". +// +// Mnemonic : VPCOMQ +// Supported forms : (2 forms) +// +// * VPCOMQ imm8, xmm, xmm, xmm [XOP] +// * VPCOMQ imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMQ", 4, Operands { v0, v1, v2, v3 }) + // VPCOMQ imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xcf) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMQ imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xcf) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMQ") + } + return p +} + +// VPCOMUB performs "Compare Packed Unsigned Byte Integers". +// +// Mnemonic : VPCOMUB +// Supported forms : (2 forms) +// +// * VPCOMUB imm8, xmm, xmm, xmm [XOP] +// * VPCOMUB imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMUB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMUB", 4, Operands { v0, v1, v2, v3 }) + // VPCOMUB imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xec) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMUB imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xec) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMUB") + } + return p +} + +// VPCOMUD performs "Compare Packed Unsigned Doubleword Integers". +// +// Mnemonic : VPCOMUD +// Supported forms : (2 forms) +// +// * VPCOMUD imm8, xmm, xmm, xmm [XOP] +// * VPCOMUD imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMUD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMUD", 4, Operands { v0, v1, v2, v3 }) + // VPCOMUD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xee) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMUD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xee) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMUD") + } + return p +} + +// VPCOMUQ performs "Compare Packed Unsigned Quadword Integers". +// +// Mnemonic : VPCOMUQ +// Supported forms : (2 forms) +// +// * VPCOMUQ imm8, xmm, xmm, xmm [XOP] +// * VPCOMUQ imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMUQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMUQ", 4, Operands { v0, v1, v2, v3 }) + // VPCOMUQ imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xef) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMUQ imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xef) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMUQ") + } + return p +} + +// VPCOMUW performs "Compare Packed Unsigned Word Integers". +// +// Mnemonic : VPCOMUW +// Supported forms : (2 forms) +// +// * VPCOMUW imm8, xmm, xmm, xmm [XOP] +// * VPCOMUW imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMUW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMUW", 4, Operands { v0, v1, v2, v3 }) + // VPCOMUW imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xed) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMUW imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xed) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMUW") + } + return p +} + +// VPCOMW performs "Compare Packed Signed Word Integers". +// +// Mnemonic : VPCOMW +// Supported forms : (2 forms) +// +// * VPCOMW imm8, xmm, xmm, xmm [XOP] +// * VPCOMW imm8, m128, xmm, xmm [XOP] +// +func (self *Program) VPCOMW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPCOMW", 4, Operands { v0, v1, v2, v3 }) + // VPCOMW imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPCOMW imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xcd) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCOMW") + } + return p +} + +// VPCONFLICTD performs "Detect Conflicts Within a Vector of Packed Doubleword Values into Dense Memory/Register". +// +// Mnemonic : VPCONFLICTD +// Supported forms : (6 forms) +// +// * VPCONFLICTD m128/m32bcst, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTD m256/m32bcst, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTD m512/m32bcst, zmm{k}{z} [AVX512CD] +// * VPCONFLICTD xmm, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTD ymm, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTD zmm, zmm{k}{z} [AVX512CD] +// +func (self *Program) VPCONFLICTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPCONFLICTD", 2, Operands { v0, v1 }) + // VPCONFLICTD m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPCONFLICTD m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPCONFLICTD m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPCONFLICTD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPCONFLICTD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPCONFLICTD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCONFLICTD") + } + return p +} + +// VPCONFLICTQ performs "Detect Conflicts Within a Vector of Packed Quadword Values into Dense Memory/Register". +// +// Mnemonic : VPCONFLICTQ +// Supported forms : (6 forms) +// +// * VPCONFLICTQ m128/m64bcst, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTQ m256/m64bcst, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTQ m512/m64bcst, zmm{k}{z} [AVX512CD] +// * VPCONFLICTQ xmm, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTQ ymm, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPCONFLICTQ zmm, zmm{k}{z} [AVX512CD] +// +func (self *Program) VPCONFLICTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPCONFLICTQ", 2, Operands { v0, v1 }) + // VPCONFLICTQ m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPCONFLICTQ m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPCONFLICTQ m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xc4) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPCONFLICTQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPCONFLICTQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPCONFLICTQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPCONFLICTQ") + } + return p +} + +// VPERM2F128 performs "Permute Floating-Point Values". +// +// Mnemonic : VPERM2F128 +// Supported forms : (2 forms) +// +// * VPERM2F128 imm8, ymm, ymm, ymm [AVX] +// * VPERM2F128 imm8, m256, ymm, ymm [AVX] +// +func (self *Program) VPERM2F128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPERM2F128", 4, Operands { v0, v1, v2, v3 }) + // VPERM2F128 imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x06) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERM2F128 imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x06) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERM2F128") + } + return p +} + +// VPERM2I128 performs "Permute 128-Bit Integer Values". +// +// Mnemonic : VPERM2I128 +// Supported forms : (2 forms) +// +// * VPERM2I128 imm8, ymm, ymm, ymm [AVX2] +// * VPERM2I128 imm8, m256, ymm, ymm [AVX2] +// +func (self *Program) VPERM2I128(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPERM2I128", 4, Operands { v0, v1, v2, v3 }) + // VPERM2I128 imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERM2I128 imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x46) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERM2I128") + } + return p +} + +// VPERMB performs "Permute Byte Integers". +// +// Mnemonic : VPERMB +// Supported forms : (6 forms) +// +// * VPERMB xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMB m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMB ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMB m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMB zmm, zmm, zmm{k}{z} [AVX512VBMI] +// * VPERMB m512, zmm, zmm{k}{z} [AVX512VBMI] +// +func (self *Program) VPERMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMB", 3, Operands { v0, v1, v2 }) + // VPERMB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMB") + } + return p +} + +// VPERMD performs "Permute Doubleword Integers". +// +// Mnemonic : VPERMD +// Supported forms : (6 forms) +// +// * VPERMD ymm, ymm, ymm [AVX2] +// * VPERMD m256, ymm, ymm [AVX2] +// * VPERMD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMD", 3, Operands { v0, v1, v2 }) + // VPERMD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMD") + } + return p +} + +// VPERMI2B performs "Full Permute of Bytes From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2B +// Supported forms : (6 forms) +// +// * VPERMI2B xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMI2B m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMI2B ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMI2B m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMI2B zmm, zmm, zmm{k}{z} [AVX512VBMI] +// * VPERMI2B m512, zmm, zmm{k}{z} [AVX512VBMI] +// +func (self *Program) VPERMI2B(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2B", 3, Operands { v0, v1, v2 }) + // VPERMI2B xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2B m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2B ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2B m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2B zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2B m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2B") + } + return p +} + +// VPERMI2D performs "Full Permute of Doublewords From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2D +// Supported forms : (6 forms) +// +// * VPERMI2D m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2D zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2D m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2D xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2D m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2D ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMI2D(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2D", 3, Operands { v0, v1, v2 }) + // VPERMI2D m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2D zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2D m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2D xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2D m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2D ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2D") + } + return p +} + +// VPERMI2PD performs "Full Permute of Double-Precision Floating-Point Values From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2PD +// Supported forms : (6 forms) +// +// * VPERMI2PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMI2PD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2PD", 3, Operands { v0, v1, v2 }) + // VPERMI2PD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2PD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2PD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2PD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2PD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2PD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2PD") + } + return p +} + +// VPERMI2PS performs "Full Permute of Single-Precision Floating-Point Values From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2PS +// Supported forms : (6 forms) +// +// * VPERMI2PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMI2PS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2PS", 3, Operands { v0, v1, v2 }) + // VPERMI2PS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2PS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2PS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2PS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2PS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x77) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2PS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x77) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2PS") + } + return p +} + +// VPERMI2Q performs "Full Permute of Quadwords From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2Q +// Supported forms : (6 forms) +// +// * VPERMI2Q m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2Q zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMI2Q m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2Q xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2Q m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMI2Q ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMI2Q(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2Q", 3, Operands { v0, v1, v2 }) + // VPERMI2Q m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2Q zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2Q m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2Q xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2Q m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x76) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMI2Q ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x76) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2Q") + } + return p +} + +// VPERMI2W performs "Full Permute of Words From Two Tables Overwriting the Index". +// +// Mnemonic : VPERMI2W +// Supported forms : (6 forms) +// +// * VPERMI2W zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPERMI2W m512, zmm, zmm{k}{z} [AVX512BW] +// * VPERMI2W xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMI2W m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMI2W ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPERMI2W m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPERMI2W(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMI2W", 3, Operands { v0, v1, v2 }) + // VPERMI2W zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2W m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMI2W xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2W m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMI2W ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x75) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMI2W m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x75) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMI2W") + } + return p +} + +// VPERMIL2PD performs "Permute Two-Source Double-Precision Floating-Point Vectors". +// +// Mnemonic : VPERMIL2PD +// Supported forms : (6 forms) +// +// * VPERMIL2PD imm4, xmm, xmm, xmm, xmm [XOP] +// * VPERMIL2PD imm4, m128, xmm, xmm, xmm [XOP] +// * VPERMIL2PD imm4, xmm, m128, xmm, xmm [XOP] +// * VPERMIL2PD imm4, ymm, ymm, ymm, ymm [XOP] +// * VPERMIL2PD imm4, m256, ymm, ymm, ymm [XOP] +// * VPERMIL2PD imm4, ymm, m256, ymm, ymm [XOP] +// +func (self *Program) VPERMIL2PD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, v4 interface{}) *Instruction { + p := self.alloc("VPERMIL2PD", 5, Operands { v0, v1, v2, v3, v4 }) + // VPERMIL2PD imm4, xmm, xmm, xmm, xmm + if isImm4(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79 ^ (hlcode(v[3]) << 3)) + m.emit(0x49) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9 ^ (hlcode(v[3]) << 3)) + m.emit(0x49) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1])) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, m128, xmm, xmm, xmm + if isImm4(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[4]), addr(v[1]), hlcode(v[3])) + m.emit(0x49) + m.mrsd(lcode(v[4]), addr(v[1]), 1) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, xmm, m128, xmm, xmm + if isImm4(v0) && isXMM(v1) && isM128(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[4]), addr(v[2]), hlcode(v[3])) + m.emit(0x49) + m.mrsd(lcode(v[4]), addr(v[2]), 1) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, ymm, ymm, ymm, ymm + if isImm4(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit(0x49) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit(0x49) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1])) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, m256, ymm, ymm, ymm + if isImm4(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[4]), addr(v[1]), hlcode(v[3])) + m.emit(0x49) + m.mrsd(lcode(v[4]), addr(v[1]), 1) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PD imm4, ymm, m256, ymm, ymm + if isImm4(v0) && isYMM(v1) && isM256(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[4]), addr(v[2]), hlcode(v[3])) + m.emit(0x49) + m.mrsd(lcode(v[4]), addr(v[2]), 1) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMIL2PD") + } + return p +} + +// VPERMIL2PS performs "Permute Two-Source Single-Precision Floating-Point Vectors". +// +// Mnemonic : VPERMIL2PS +// Supported forms : (6 forms) +// +// * VPERMIL2PS imm4, xmm, xmm, xmm, xmm [XOP] +// * VPERMIL2PS imm4, m128, xmm, xmm, xmm [XOP] +// * VPERMIL2PS imm4, xmm, m128, xmm, xmm [XOP] +// * VPERMIL2PS imm4, ymm, ymm, ymm, ymm [XOP] +// * VPERMIL2PS imm4, m256, ymm, ymm, ymm [XOP] +// * VPERMIL2PS imm4, ymm, m256, ymm, ymm [XOP] +// +func (self *Program) VPERMIL2PS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, v4 interface{}) *Instruction { + p := self.alloc("VPERMIL2PS", 5, Operands { v0, v1, v2, v3, v4 }) + // VPERMIL2PS imm4, xmm, xmm, xmm, xmm + if isImm4(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79 ^ (hlcode(v[3]) << 3)) + m.emit(0x48) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9 ^ (hlcode(v[3]) << 3)) + m.emit(0x48) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1])) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, m128, xmm, xmm, xmm + if isImm4(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[4]), addr(v[1]), hlcode(v[3])) + m.emit(0x48) + m.mrsd(lcode(v[4]), addr(v[1]), 1) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, xmm, m128, xmm, xmm + if isImm4(v0) && isXMM(v1) && isM128(v2) && isXMM(v3) && isXMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[4]), addr(v[2]), hlcode(v[3])) + m.emit(0x48) + m.mrsd(lcode(v[4]), addr(v[2]), 1) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, ymm, ymm, ymm, ymm + if isImm4(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit(0x48) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[4]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit(0x48) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[1])) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, m256, ymm, ymm, ymm + if isImm4(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[4]), addr(v[1]), hlcode(v[3])) + m.emit(0x48) + m.mrsd(lcode(v[4]), addr(v[1]), 1) + m.emit((hlcode(v[2]) << 4) | imml(v[0])) + }) + } + // VPERMIL2PS imm4, ymm, m256, ymm, ymm + if isImm4(v0) && isYMM(v1) && isM256(v2) && isYMM(v3) && isYMM(v4) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[4]), addr(v[2]), hlcode(v[3])) + m.emit(0x48) + m.mrsd(lcode(v[4]), addr(v[2]), 1) + m.emit((hlcode(v[1]) << 4) | imml(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMIL2PS") + } + return p +} + +// VPERMILPD performs "Permute Double-Precision Floating-Point Values". +// +// Mnemonic : VPERMILPD +// Supported forms : (20 forms) +// +// * VPERMILPD imm8, xmm, xmm [AVX] +// * VPERMILPD xmm, xmm, xmm [AVX] +// * VPERMILPD m128, xmm, xmm [AVX] +// * VPERMILPD imm8, m128, xmm [AVX] +// * VPERMILPD imm8, ymm, ymm [AVX] +// * VPERMILPD ymm, ymm, ymm [AVX] +// * VPERMILPD m256, ymm, ymm [AVX] +// * VPERMILPD imm8, m256, ymm [AVX] +// * VPERMILPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPERMILPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMILPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMILPD", 3, Operands { v0, v1, v2 }) + // VPERMILPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMILPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMILPD imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMILPD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPD imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMILPD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMILPD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x0d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMILPD") + } + return p +} + +// VPERMILPS performs "Permute Single-Precision Floating-Point Values". +// +// Mnemonic : VPERMILPS +// Supported forms : (20 forms) +// +// * VPERMILPS imm8, xmm, xmm [AVX] +// * VPERMILPS xmm, xmm, xmm [AVX] +// * VPERMILPS m128, xmm, xmm [AVX] +// * VPERMILPS imm8, m128, xmm [AVX] +// * VPERMILPS imm8, ymm, ymm [AVX] +// * VPERMILPS ymm, ymm, ymm [AVX] +// * VPERMILPS m256, ymm, ymm [AVX] +// * VPERMILPS imm8, m256, ymm [AVX] +// * VPERMILPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPERMILPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPS imm8, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMILPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMILPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMILPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMILPS", 3, Operands { v0, v1, v2 }) + // VPERMILPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMILPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMILPS imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMILPS imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPS imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMILPS imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMILPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x0c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMILPS imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMILPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x0c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMILPS") + } + return p +} + +// VPERMPD performs "Permute Double-Precision Floating-Point Elements". +// +// Mnemonic : VPERMPD +// Supported forms : (10 forms) +// +// * VPERMPD imm8, ymm, ymm [AVX2] +// * VPERMPD imm8, m256, ymm [AVX2] +// * VPERMPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPERMPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMPD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPERMPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMPD", 3, Operands { v0, v1, v2 }) + // VPERMPD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfd) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[2]), addr(v[1]), 0) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMPD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMPD imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMPD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMPD") + } + return p +} + +// VPERMPS performs "Permute Single-Precision Floating-Point Elements". +// +// Mnemonic : VPERMPS +// Supported forms : (6 forms) +// +// * VPERMPS ymm, ymm, ymm [AVX2] +// * VPERMPS m256, ymm, ymm [AVX2] +// * VPERMPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMPS", 3, Operands { v0, v1, v2 }) + // VPERMPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPERMPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x16) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x16) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMPS") + } + return p +} + +// VPERMQ performs "Permute Quadword Integers". +// +// Mnemonic : VPERMQ +// Supported forms : (10 forms) +// +// * VPERMQ imm8, ymm, ymm [AVX2] +// * VPERMQ imm8, m256, ymm [AVX2] +// * VPERMQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPERMQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPERMQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMQ", 3, Operands { v0, v1, v2 }) + // VPERMQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xfd) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x85, hcode(v[2]), addr(v[1]), 0) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x36) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPERMQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x36) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMQ") + } + return p +} + +// VPERMT2B performs "Full Permute of Bytes From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2B +// Supported forms : (6 forms) +// +// * VPERMT2B xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMT2B m128, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMT2B ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMT2B m256, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPERMT2B zmm, zmm, zmm{k}{z} [AVX512VBMI] +// * VPERMT2B m512, zmm, zmm{k}{z} [AVX512VBMI] +// +func (self *Program) VPERMT2B(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2B", 3, Operands { v0, v1, v2 }) + // VPERMT2B xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2B m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2B ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2B m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2B zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2B m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2B") + } + return p +} + +// VPERMT2D performs "Full Permute of Doublewords From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2D +// Supported forms : (6 forms) +// +// * VPERMT2D m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2D zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2D m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2D xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2D m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2D ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMT2D(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2D", 3, Operands { v0, v1, v2 }) + // VPERMT2D m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2D zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2D m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2D xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2D m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2D ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2D") + } + return p +} + +// VPERMT2PD performs "Full Permute of Double-Precision Floating-Point Values From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2PD +// Supported forms : (6 forms) +// +// * VPERMT2PD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2PD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2PD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMT2PD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2PD", 3, Operands { v0, v1, v2 }) + // VPERMT2PD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2PD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2PD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2PD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2PD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2PD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2PD") + } + return p +} + +// VPERMT2PS performs "Full Permute of Single-Precision Floating-Point Values From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2PS +// Supported forms : (6 forms) +// +// * VPERMT2PS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2PS zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2PS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2PS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMT2PS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2PS", 3, Operands { v0, v1, v2 }) + // VPERMT2PS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2PS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2PS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2PS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2PS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2PS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2PS") + } + return p +} + +// VPERMT2Q performs "Full Permute of Quadwords From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2Q +// Supported forms : (6 forms) +// +// * VPERMT2Q m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2Q zmm, zmm, zmm{k}{z} [AVX512F] +// * VPERMT2Q m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2Q xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2Q m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPERMT2Q ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPERMT2Q(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2Q", 3, Operands { v0, v1, v2 }) + // VPERMT2Q m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2Q zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2Q m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2Q xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2Q m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x7e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPERMT2Q ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2Q") + } + return p +} + +// VPERMT2W performs "Full Permute of Words From Two Tables Overwriting a Table". +// +// Mnemonic : VPERMT2W +// Supported forms : (6 forms) +// +// * VPERMT2W zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPERMT2W m512, zmm, zmm{k}{z} [AVX512BW] +// * VPERMT2W xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMT2W m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMT2W ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPERMT2W m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPERMT2W(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMT2W", 3, Operands { v0, v1, v2 }) + // VPERMT2W zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2W m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMT2W xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2W m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMT2W ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x7d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMT2W m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x7d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMT2W") + } + return p +} + +// VPERMW performs "Permute Word Integers". +// +// Mnemonic : VPERMW +// Supported forms : (6 forms) +// +// * VPERMW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPERMW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPERMW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPERMW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPERMW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPERMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPERMW", 3, Operands { v0, v1, v2 }) + // VPERMW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPERMW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPERMW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x8d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPERMW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x8d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPERMW") + } + return p +} + +// VPEXPANDD performs "Load Sparse Packed Doubleword Integer Values from Dense Memory/Register". +// +// Mnemonic : VPEXPANDD +// Supported forms : (6 forms) +// +// * VPEXPANDD zmm, zmm{k}{z} [AVX512F] +// * VPEXPANDD m512, zmm{k}{z} [AVX512F] +// * VPEXPANDD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDD m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDD m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPEXPANDD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPEXPANDD", 2, Operands { v0, v1 }) + // VPEXPANDD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDD m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPEXPANDD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDD m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPEXPANDD m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXPANDD") + } + return p +} + +// VPEXPANDQ performs "Load Sparse Packed Quadword Integer Values from Dense Memory/Register". +// +// Mnemonic : VPEXPANDQ +// Supported forms : (6 forms) +// +// * VPEXPANDQ zmm, zmm{k}{z} [AVX512F] +// * VPEXPANDQ m512, zmm{k}{z} [AVX512F] +// * VPEXPANDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDQ ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDQ m128, xmm{k}{z} [AVX512F,AVX512VL] +// * VPEXPANDQ m256, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPEXPANDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPEXPANDQ", 2, Operands { v0, v1 }) + // VPEXPANDQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDQ m512, zmm{k}{z} + if isM512(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPEXPANDQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x89) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPEXPANDQ m128, xmm{k}{z} + if isM128(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPEXPANDQ m256, ymm{k}{z} + if isM256(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x89) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXPANDQ") + } + return p +} + +// VPEXTRB performs "Extract Byte". +// +// Mnemonic : VPEXTRB +// Supported forms : (4 forms) +// +// * VPEXTRB imm8, xmm, r32 [AVX] +// * VPEXTRB imm8, xmm, m8 [AVX] +// * VPEXTRB imm8, xmm, r32 [AVX512BW] +// * VPEXTRB imm8, xmm, m8 [AVX512BW] +// +func (self *Program) VPEXTRB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPEXTRB", 3, Operands { v0, v1, v2 }) + // VPEXTRB imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRB imm8, xmm, m8 + if isImm8(v0) && isXMM(v1) && isM8(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRB imm8, xmm, r32 + if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x14) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRB imm8, xmm, m8 + if isImm8(v0) && isEVEXXMM(v1) && isM8(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x14) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXTRB") + } + return p +} + +// VPEXTRD performs "Extract Doubleword". +// +// Mnemonic : VPEXTRD +// Supported forms : (4 forms) +// +// * VPEXTRD imm8, xmm, r32 [AVX] +// * VPEXTRD imm8, xmm, m32 [AVX] +// * VPEXTRD imm8, xmm, r32 [AVX512DQ] +// * VPEXTRD imm8, xmm, m32 [AVX512DQ] +// +func (self *Program) VPEXTRD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPEXTRD", 3, Operands { v0, v1, v2 }) + // VPEXTRD imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRD imm8, xmm, m32 + if isImm8(v0) && isXMM(v1) && isM32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRD imm8, xmm, r32 + if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRD imm8, xmm, m32 + if isImm8(v0) && isEVEXXMM(v1) && isM32(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXTRD") + } + return p +} + +// VPEXTRQ performs "Extract Quadword". +// +// Mnemonic : VPEXTRQ +// Supported forms : (4 forms) +// +// * VPEXTRQ imm8, xmm, r64 [AVX] +// * VPEXTRQ imm8, xmm, m64 [AVX] +// * VPEXTRQ imm8, xmm, r64 [AVX512DQ] +// * VPEXTRQ imm8, xmm, m64 [AVX512DQ] +// +func (self *Program) VPEXTRQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPEXTRQ", 3, Operands { v0, v1, v2 }) + // VPEXTRQ imm8, xmm, r64 + if isImm8(v0) && isXMM(v1) && isReg64(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0xf9) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRQ imm8, xmm, m64 + if isImm8(v0) && isXMM(v1) && isM64(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[1]), addr(v[2]), 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRQ imm8, xmm, r64 + if isImm8(v0) && isEVEXXMM(v1) && isReg64(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit(0x08) + m.emit(0x16) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRQ imm8, xmm, m64 + if isImm8(v0) && isEVEXXMM(v1) && isM64(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x16) + m.mrsd(lcode(v[1]), addr(v[2]), 8) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXTRQ") + } + return p +} + +// VPEXTRW performs "Extract Word". +// +// Mnemonic : VPEXTRW +// Supported forms : (4 forms) +// +// * VPEXTRW imm8, xmm, r32 [AVX] +// * VPEXTRW imm8, xmm, m16 [AVX] +// * VPEXTRW imm8, xmm, r32 [AVX512BW] +// * VPEXTRW imm8, xmm, m16 [AVX512BW] +// +func (self *Program) VPEXTRW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPEXTRW", 3, Operands { v0, v1, v2 }) + // VPEXTRW imm8, xmm, r32 + if isImm8(v0) && isXMM(v1) && isReg32(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[1], 0) + m.emit(0xc5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[1]) << 7) ^ (hcode(v[2]) << 5)) + m.emit(0x79) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRW imm8, xmm, m16 + if isImm8(v0) && isXMM(v1) && isM16(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[1]), addr(v[2]), 0) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[2]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRW imm8, xmm, r32 + if isImm8(v0) && isEVEXXMM(v1) && isReg32(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[1]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0x15) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit(0x08) + m.emit(0xc5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPEXTRW imm8, xmm, m16 + if isImm8(v0) && isEVEXXMM(v1) && isM16(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[1]), addr(v[2]), 0, 0, 0, 0) + m.emit(0x15) + m.mrsd(lcode(v[1]), addr(v[2]), 2) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPEXTRW") + } + return p +} + +// VPGATHERDD performs "Gather Packed Doubleword Values Using Signed Doubleword Indices". +// +// Mnemonic : VPGATHERDD +// Supported forms : (5 forms) +// +// * VPGATHERDD xmm, vm32x, xmm [AVX2] +// * VPGATHERDD ymm, vm32y, ymm [AVX2] +// * VPGATHERDD vm32z, zmm{k} [AVX512F] +// * VPGATHERDD vm32x, xmm{k} [AVX512F,AVX512VL] +// * VPGATHERDD vm32y, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VPGATHERDD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VPGATHERDD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VPGATHERDD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VPGATHERDD takes 2 or 3 operands") + } + // VPGATHERDD xmm, vm32x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERDD ymm, vm32y, ymm + if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERDD vm32z, zmm{k} + if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPGATHERDD vm32x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPGATHERDD vm32y, ymm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPGATHERDD") + } + return p +} + +// VPGATHERDQ performs "Gather Packed Quadword Values Using Signed Doubleword Indices". +// +// Mnemonic : VPGATHERDQ +// Supported forms : (5 forms) +// +// * VPGATHERDQ xmm, vm32x, xmm [AVX2] +// * VPGATHERDQ ymm, vm32x, ymm [AVX2] +// * VPGATHERDQ vm32y, zmm{k} [AVX512F] +// * VPGATHERDQ vm32x, xmm{k} [AVX512F,AVX512VL] +// * VPGATHERDQ vm32x, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VPGATHERDQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VPGATHERDQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VPGATHERDQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VPGATHERDQ takes 2 or 3 operands") + } + // VPGATHERDQ xmm, vm32x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERDQ ymm, vm32x, ymm + if len(vv) == 1 && isYMM(v0) && isVMX(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERDQ vm32y, zmm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPGATHERDQ vm32x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPGATHERDQ vm32x, ymm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x90) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPGATHERDQ") + } + return p +} + +// VPGATHERQD performs "Gather Packed Doubleword Values Using Signed Quadword Indices". +// +// Mnemonic : VPGATHERQD +// Supported forms : (5 forms) +// +// * VPGATHERQD xmm, vm64x, xmm [AVX2] +// * VPGATHERQD xmm, vm64y, xmm [AVX2] +// * VPGATHERQD vm64z, ymm{k} [AVX512F] +// * VPGATHERQD vm64x, xmm{k} [AVX512F,AVX512VL] +// * VPGATHERQD vm64y, xmm{k} [AVX512F,AVX512VL] +// +func (self *Program) VPGATHERQD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VPGATHERQD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VPGATHERQD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VPGATHERQD takes 2 or 3 operands") + } + // VPGATHERQD xmm, vm64x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERQD xmm, vm64y, xmm + if len(vv) == 1 && isXMM(v0) && isVMY(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERQD vm64z, ymm{k} + if len(vv) == 0 && isVMZ(v0) && isYMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPGATHERQD vm64x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPGATHERQD vm64y, xmm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPGATHERQD") + } + return p +} + +// VPGATHERQQ performs "Gather Packed Quadword Values Using Signed Quadword Indices". +// +// Mnemonic : VPGATHERQQ +// Supported forms : (5 forms) +// +// * VPGATHERQQ xmm, vm64x, xmm [AVX2] +// * VPGATHERQQ ymm, vm64y, ymm [AVX2] +// * VPGATHERQQ vm64z, zmm{k} [AVX512F] +// * VPGATHERQQ vm64x, xmm{k} [AVX512F,AVX512VL] +// * VPGATHERQQ vm64y, ymm{k} [AVX512F,AVX512VL] +// +func (self *Program) VPGATHERQQ(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VPGATHERQQ", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VPGATHERQQ", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VPGATHERQQ takes 2 or 3 operands") + } + // VPGATHERQQ xmm, vm64x, xmm + if len(vv) == 1 && isXMM(v0) && isVMX(v1) && isXMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERQQ ymm, vm64y, ymm + if len(vv) == 1 && isYMM(v0) && isVMY(v1) && isYMM(vv[0]) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + // VPGATHERQQ vm64z, zmm{k} + if len(vv) == 0 && isVMZ(v0) && isZMMk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPGATHERQQ vm64x, xmm{k} + if len(vv) == 0 && isEVEXVMX(v0) && isXMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPGATHERQQ vm64y, ymm{k} + if len(vv) == 0 && isEVEXVMY(v0) && isYMMk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), 0, 0) + m.emit(0x91) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPGATHERQQ") + } + return p +} + +// VPHADDBD performs "Packed Horizontal Add Signed Byte to Signed Doubleword". +// +// Mnemonic : VPHADDBD +// Supported forms : (2 forms) +// +// * VPHADDBD xmm, xmm [XOP] +// * VPHADDBD m128, xmm [XOP] +// +func (self *Program) VPHADDBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDBD", 2, Operands { v0, v1 }) + // VPHADDBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDBD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDBD") + } + return p +} + +// VPHADDBQ performs "Packed Horizontal Add Signed Byte to Signed Quadword". +// +// Mnemonic : VPHADDBQ +// Supported forms : (2 forms) +// +// * VPHADDBQ xmm, xmm [XOP] +// * VPHADDBQ m128, xmm [XOP] +// +func (self *Program) VPHADDBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDBQ", 2, Operands { v0, v1 }) + // VPHADDBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDBQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDBQ") + } + return p +} + +// VPHADDBW performs "Packed Horizontal Add Signed Byte to Signed Word". +// +// Mnemonic : VPHADDBW +// Supported forms : (2 forms) +// +// * VPHADDBW xmm, xmm [XOP] +// * VPHADDBW m128, xmm [XOP] +// +func (self *Program) VPHADDBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDBW", 2, Operands { v0, v1 }) + // VPHADDBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDBW") + } + return p +} + +// VPHADDD performs "Packed Horizontal Add Doubleword Integer". +// +// Mnemonic : VPHADDD +// Supported forms : (4 forms) +// +// * VPHADDD xmm, xmm, xmm [AVX] +// * VPHADDD m128, xmm, xmm [AVX] +// * VPHADDD ymm, ymm, ymm [AVX2] +// * VPHADDD m256, ymm, ymm [AVX2] +// +func (self *Program) VPHADDD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHADDD", 3, Operands { v0, v1, v2 }) + // VPHADDD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHADDD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x02) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x02) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDD") + } + return p +} + +// VPHADDDQ performs "Packed Horizontal Add Signed Doubleword to Signed Quadword". +// +// Mnemonic : VPHADDDQ +// Supported forms : (2 forms) +// +// * VPHADDDQ xmm, xmm [XOP] +// * VPHADDDQ m128, xmm [XOP] +// +func (self *Program) VPHADDDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDDQ", 2, Operands { v0, v1 }) + // VPHADDDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xcb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDDQ") + } + return p +} + +// VPHADDSW performs "Packed Horizontal Add Signed Word Integers with Signed Saturation". +// +// Mnemonic : VPHADDSW +// Supported forms : (4 forms) +// +// * VPHADDSW xmm, xmm, xmm [AVX] +// * VPHADDSW m128, xmm, xmm [AVX] +// * VPHADDSW ymm, ymm, ymm [AVX2] +// * VPHADDSW m256, ymm, ymm [AVX2] +// +func (self *Program) VPHADDSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHADDSW", 3, Operands { v0, v1, v2 }) + // VPHADDSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x03) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHADDSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x03) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x03) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDSW") + } + return p +} + +// VPHADDUBD performs "Packed Horizontal Add Unsigned Byte to Doubleword". +// +// Mnemonic : VPHADDUBD +// Supported forms : (2 forms) +// +// * VPHADDUBD xmm, xmm [XOP] +// * VPHADDUBD m128, xmm [XOP] +// +func (self *Program) VPHADDUBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUBD", 2, Operands { v0, v1 }) + // VPHADDUBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUBD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUBD") + } + return p +} + +// VPHADDUBQ performs "Packed Horizontal Add Unsigned Byte to Quadword". +// +// Mnemonic : VPHADDUBQ +// Supported forms : (2 forms) +// +// * VPHADDUBQ xmm, xmm [XOP] +// * VPHADDUBQ m128, xmm [XOP] +// +func (self *Program) VPHADDUBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUBQ", 2, Operands { v0, v1 }) + // VPHADDUBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUBQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUBQ") + } + return p +} + +// VPHADDUBW performs "Packed Horizontal Add Unsigned Byte to Word". +// +// Mnemonic : VPHADDUBW +// Supported forms : (2 forms) +// +// * VPHADDUBW xmm, xmm [XOP] +// * VPHADDUBW m128, xmm [XOP] +// +func (self *Program) VPHADDUBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUBW", 2, Operands { v0, v1 }) + // VPHADDUBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUBW") + } + return p +} + +// VPHADDUDQ performs "Packed Horizontal Add Unsigned Doubleword to Quadword". +// +// Mnemonic : VPHADDUDQ +// Supported forms : (2 forms) +// +// * VPHADDUDQ xmm, xmm [XOP] +// * VPHADDUDQ m128, xmm [XOP] +// +func (self *Program) VPHADDUDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUDQ", 2, Operands { v0, v1 }) + // VPHADDUDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xdb) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xdb) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUDQ") + } + return p +} + +// VPHADDUWD performs "Packed Horizontal Add Unsigned Word to Doubleword". +// +// Mnemonic : VPHADDUWD +// Supported forms : (2 forms) +// +// * VPHADDUWD xmm, xmm [XOP] +// * VPHADDUWD m128, xmm [XOP] +// +func (self *Program) VPHADDUWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUWD", 2, Operands { v0, v1 }) + // VPHADDUWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUWD") + } + return p +} + +// VPHADDUWQ performs "Packed Horizontal Add Unsigned Word to Quadword". +// +// Mnemonic : VPHADDUWQ +// Supported forms : (2 forms) +// +// * VPHADDUWQ xmm, xmm [XOP] +// * VPHADDUWQ m128, xmm [XOP] +// +func (self *Program) VPHADDUWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDUWQ", 2, Operands { v0, v1 }) + // VPHADDUWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDUWQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xd7) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDUWQ") + } + return p +} + +// VPHADDW performs "Packed Horizontal Add Word Integers". +// +// Mnemonic : VPHADDW +// Supported forms : (4 forms) +// +// * VPHADDW xmm, xmm, xmm [AVX] +// * VPHADDW m128, xmm, xmm [AVX] +// * VPHADDW ymm, ymm, ymm [AVX2] +// * VPHADDW m256, ymm, ymm [AVX2] +// +func (self *Program) VPHADDW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHADDW", 3, Operands { v0, v1, v2 }) + // VPHADDW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHADDW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x01) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHADDW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x01) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDW") + } + return p +} + +// VPHADDWD performs "Packed Horizontal Add Signed Word to Signed Doubleword". +// +// Mnemonic : VPHADDWD +// Supported forms : (2 forms) +// +// * VPHADDWD xmm, xmm [XOP] +// * VPHADDWD m128, xmm [XOP] +// +func (self *Program) VPHADDWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDWD", 2, Operands { v0, v1 }) + // VPHADDWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc6) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDWD") + } + return p +} + +// VPHADDWQ performs "Packed Horizontal Add Signed Word to Signed Quadword". +// +// Mnemonic : VPHADDWQ +// Supported forms : (2 forms) +// +// * VPHADDWQ xmm, xmm [XOP] +// * VPHADDWQ m128, xmm [XOP] +// +func (self *Program) VPHADDWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHADDWQ", 2, Operands { v0, v1 }) + // VPHADDWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xc7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHADDWQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xc7) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHADDWQ") + } + return p +} + +// VPHMINPOSUW performs "Packed Horizontal Minimum of Unsigned Word Integers". +// +// Mnemonic : VPHMINPOSUW +// Supported forms : (2 forms) +// +// * VPHMINPOSUW xmm, xmm [AVX] +// * VPHMINPOSUW m128, xmm [AVX] +// +func (self *Program) VPHMINPOSUW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHMINPOSUW", 2, Operands { v0, v1 }) + // VPHMINPOSUW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x41) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHMINPOSUW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x41) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHMINPOSUW") + } + return p +} + +// VPHSUBBW performs "Packed Horizontal Subtract Signed Byte to Signed Word". +// +// Mnemonic : VPHSUBBW +// Supported forms : (2 forms) +// +// * VPHSUBBW xmm, xmm [XOP] +// * VPHSUBBW m128, xmm [XOP] +// +func (self *Program) VPHSUBBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHSUBBW", 2, Operands { v0, v1 }) + // VPHSUBBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHSUBBW m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe1) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBBW") + } + return p +} + +// VPHSUBD performs "Packed Horizontal Subtract Doubleword Integers". +// +// Mnemonic : VPHSUBD +// Supported forms : (4 forms) +// +// * VPHSUBD xmm, xmm, xmm [AVX] +// * VPHSUBD m128, xmm, xmm [AVX] +// * VPHSUBD ymm, ymm, ymm [AVX2] +// * VPHSUBD m256, ymm, ymm [AVX2] +// +func (self *Program) VPHSUBD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHSUBD", 3, Operands { v0, v1, v2 }) + // VPHSUBD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x06) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x06) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHSUBD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x06) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x06) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBD") + } + return p +} + +// VPHSUBDQ performs "Packed Horizontal Subtract Signed Doubleword to Signed Quadword". +// +// Mnemonic : VPHSUBDQ +// Supported forms : (2 forms) +// +// * VPHSUBDQ xmm, xmm [XOP] +// * VPHSUBDQ m128, xmm [XOP] +// +func (self *Program) VPHSUBDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHSUBDQ", 2, Operands { v0, v1 }) + // VPHSUBDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xe3) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHSUBDQ m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe3) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBDQ") + } + return p +} + +// VPHSUBSW performs "Packed Horizontal Subtract Signed Word Integers with Signed Saturation". +// +// Mnemonic : VPHSUBSW +// Supported forms : (4 forms) +// +// * VPHSUBSW xmm, xmm, xmm [AVX] +// * VPHSUBSW m128, xmm, xmm [AVX] +// * VPHSUBSW ymm, ymm, ymm [AVX2] +// * VPHSUBSW m256, ymm, ymm [AVX2] +// +func (self *Program) VPHSUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHSUBSW", 3, Operands { v0, v1, v2 }) + // VPHSUBSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x07) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x07) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHSUBSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x07) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x07) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBSW") + } + return p +} + +// VPHSUBW performs "Packed Horizontal Subtract Word Integers". +// +// Mnemonic : VPHSUBW +// Supported forms : (4 forms) +// +// * VPHSUBW xmm, xmm, xmm [AVX] +// * VPHSUBW m128, xmm, xmm [AVX] +// * VPHSUBW ymm, ymm, ymm [AVX2] +// * VPHSUBW m256, ymm, ymm [AVX2] +// +func (self *Program) VPHSUBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPHSUBW", 3, Operands { v0, v1, v2 }) + // VPHSUBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPHSUBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x05) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPHSUBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x05) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBW") + } + return p +} + +// VPHSUBWD performs "Packed Horizontal Subtract Signed Word to Signed Doubleword". +// +// Mnemonic : VPHSUBWD +// Supported forms : (2 forms) +// +// * VPHSUBWD xmm, xmm [XOP] +// * VPHSUBWD m128, xmm [XOP] +// +func (self *Program) VPHSUBWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPHSUBWD", 2, Operands { v0, v1 }) + // VPHSUBWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x78) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPHSUBWD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[1]), addr(v[0]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPHSUBWD") + } + return p +} + +// VPINSRB performs "Insert Byte". +// +// Mnemonic : VPINSRB +// Supported forms : (4 forms) +// +// * VPINSRB imm8, r32, xmm, xmm [AVX] +// * VPINSRB imm8, m8, xmm, xmm [AVX] +// * VPINSRB imm8, r32, xmm, xmm [AVX512BW] +// * VPINSRB imm8, m8, xmm, xmm [AVX512BW] +// +func (self *Program) VPINSRB(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPINSRB", 4, Operands { v0, v1, v2, v3 }) + // VPINSRB imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x20) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRB imm8, m8, xmm, xmm + if isImm8(v0) && isM8(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x20) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRB imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x20) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRB imm8, m8, xmm, xmm + if isImm8(v0) && isM8(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x20) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPINSRB") + } + return p +} + +// VPINSRD performs "Insert Doubleword". +// +// Mnemonic : VPINSRD +// Supported forms : (4 forms) +// +// * VPINSRD imm8, r32, xmm, xmm [AVX] +// * VPINSRD imm8, m32, xmm, xmm [AVX] +// * VPINSRD imm8, r32, xmm, xmm [AVX512DQ] +// * VPINSRD imm8, m32, xmm, xmm [AVX512DQ] +// +func (self *Program) VPINSRD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPINSRD", 4, Operands { v0, v1, v2, v3 }) + // VPINSRD imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x22) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRD imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x22) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRD imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x22) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRD imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x22) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPINSRD") + } + return p +} + +// VPINSRQ performs "Insert Quadword". +// +// Mnemonic : VPINSRQ +// Supported forms : (4 forms) +// +// * VPINSRQ imm8, r64, xmm, xmm [AVX] +// * VPINSRQ imm8, m64, xmm, xmm [AVX] +// * VPINSRQ imm8, r64, xmm, xmm [AVX512DQ] +// * VPINSRQ imm8, m64, xmm, xmm [AVX512DQ] +// +func (self *Program) VPINSRQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPINSRQ", 4, Operands { v0, v1, v2, v3 }) + // VPINSRQ imm8, r64, xmm, xmm + if isImm8(v0) && isReg64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0xf9 ^ (hlcode(v[2]) << 3)) + m.emit(0x22) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRQ imm8, m64, xmm, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x81, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x22) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRQ imm8, r64, xmm, xmm + if isImm8(v0) && isReg64(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x22) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRQ imm8, m64, xmm, xmm + if isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x22) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPINSRQ") + } + return p +} + +// VPINSRW performs "Insert Word". +// +// Mnemonic : VPINSRW +// Supported forms : (4 forms) +// +// * VPINSRW imm8, r32, xmm, xmm [AVX] +// * VPINSRW imm8, m16, xmm, xmm [AVX] +// * VPINSRW imm8, r32, xmm, xmm [AVX512BW] +// * VPINSRW imm8, m16, xmm, xmm [AVX512BW] +// +func (self *Program) VPINSRW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPINSRW", 4, Operands { v0, v1, v2, v3 }) + // VPINSRW imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRW imm8, m16, xmm, xmm + if isImm8(v0) && isM16(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc4) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRW imm8, r32, xmm, xmm + if isImm8(v0) && isReg32(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0xc4) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPINSRW imm8, m16, xmm, xmm + if isImm8(v0) && isM16(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0xc4) + m.mrsd(lcode(v[3]), addr(v[1]), 2) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPINSRW") + } + return p +} + +// VPLZCNTD performs "Count the Number of Leading Zero Bits for Packed Doubleword Values". +// +// Mnemonic : VPLZCNTD +// Supported forms : (6 forms) +// +// * VPLZCNTD m128/m32bcst, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTD m256/m32bcst, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTD m512/m32bcst, zmm{k}{z} [AVX512CD] +// * VPLZCNTD xmm, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTD ymm, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTD zmm, zmm{k}{z} [AVX512CD] +// +func (self *Program) VPLZCNTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPLZCNTD", 2, Operands { v0, v1 }) + // VPLZCNTD m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPLZCNTD m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPLZCNTD m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPLZCNTD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPLZCNTD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPLZCNTD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPLZCNTD") + } + return p +} + +// VPLZCNTQ performs "Count the Number of Leading Zero Bits for Packed Quadword Values". +// +// Mnemonic : VPLZCNTQ +// Supported forms : (6 forms) +// +// * VPLZCNTQ m128/m64bcst, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTQ m256/m64bcst, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTQ m512/m64bcst, zmm{k}{z} [AVX512CD] +// * VPLZCNTQ xmm, xmm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTQ ymm, ymm{k}{z} [AVX512CD,AVX512VL] +// * VPLZCNTQ zmm, zmm{k}{z} [AVX512CD] +// +func (self *Program) VPLZCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPLZCNTQ", 2, Operands { v0, v1 }) + // VPLZCNTQ m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPLZCNTQ m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPLZCNTQ m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x44) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPLZCNTQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPLZCNTQ ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPLZCNTQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512CD) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x44) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPLZCNTQ") + } + return p +} + +// VPMACSDD performs "Packed Multiply Accumulate Signed Doubleword to Signed Doubleword". +// +// Mnemonic : VPMACSDD +// Supported forms : (2 forms) +// +// * VPMACSDD xmm, xmm, xmm, xmm [XOP] +// * VPMACSDD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSDD", 4, Operands { v0, v1, v2, v3 }) + // VPMACSDD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x9e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSDD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x9e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSDD") + } + return p +} + +// VPMACSDQH performs "Packed Multiply Accumulate Signed High Doubleword to Signed Quadword". +// +// Mnemonic : VPMACSDQH +// Supported forms : (2 forms) +// +// * VPMACSDQH xmm, xmm, xmm, xmm [XOP] +// * VPMACSDQH xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSDQH(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSDQH", 4, Operands { v0, v1, v2, v3 }) + // VPMACSDQH xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x9f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSDQH xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x9f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSDQH") + } + return p +} + +// VPMACSDQL performs "Packed Multiply Accumulate Signed Low Doubleword to Signed Quadword". +// +// Mnemonic : VPMACSDQL +// Supported forms : (2 forms) +// +// * VPMACSDQL xmm, xmm, xmm, xmm [XOP] +// * VPMACSDQL xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSDQL(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSDQL", 4, Operands { v0, v1, v2, v3 }) + // VPMACSDQL xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSDQL xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x97) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSDQL") + } + return p +} + +// VPMACSSDD performs "Packed Multiply Accumulate with Saturation Signed Doubleword to Signed Doubleword". +// +// Mnemonic : VPMACSSDD +// Supported forms : (2 forms) +// +// * VPMACSSDD xmm, xmm, xmm, xmm [XOP] +// * VPMACSSDD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSDD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSDD", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSDD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x8e) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSDD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x8e) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSDD") + } + return p +} + +// VPMACSSDQH performs "Packed Multiply Accumulate with Saturation Signed High Doubleword to Signed Quadword". +// +// Mnemonic : VPMACSSDQH +// Supported forms : (2 forms) +// +// * VPMACSSDQH xmm, xmm, xmm, xmm [XOP] +// * VPMACSSDQH xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSDQH(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSDQH", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSDQH xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x8f) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSDQH xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x8f) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSDQH") + } + return p +} + +// VPMACSSDQL performs "Packed Multiply Accumulate with Saturation Signed Low Doubleword to Signed Quadword". +// +// Mnemonic : VPMACSSDQL +// Supported forms : (2 forms) +// +// * VPMACSSDQL xmm, xmm, xmm, xmm [XOP] +// * VPMACSSDQL xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSDQL(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSDQL", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSDQL xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x87) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSDQL xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x87) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSDQL") + } + return p +} + +// VPMACSSWD performs "Packed Multiply Accumulate with Saturation Signed Word to Signed Doubleword". +// +// Mnemonic : VPMACSSWD +// Supported forms : (2 forms) +// +// * VPMACSSWD xmm, xmm, xmm, xmm [XOP] +// * VPMACSSWD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSWD", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSWD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x86) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSWD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x86) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSWD") + } + return p +} + +// VPMACSSWW performs "Packed Multiply Accumulate with Saturation Signed Word to Signed Word". +// +// Mnemonic : VPMACSSWW +// Supported forms : (2 forms) +// +// * VPMACSSWW xmm, xmm, xmm, xmm [XOP] +// * VPMACSSWW xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSSWW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSSWW", 4, Operands { v0, v1, v2, v3 }) + // VPMACSSWW xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x85) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSSWW xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x85) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSSWW") + } + return p +} + +// VPMACSWD performs "Packed Multiply Accumulate Signed Word to Signed Doubleword". +// +// Mnemonic : VPMACSWD +// Supported forms : (2 forms) +// +// * VPMACSWD xmm, xmm, xmm, xmm [XOP] +// * VPMACSWD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSWD", 4, Operands { v0, v1, v2, v3 }) + // VPMACSWD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSWD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x96) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSWD") + } + return p +} + +// VPMACSWW performs "Packed Multiply Accumulate Signed Word to Signed Word". +// +// Mnemonic : VPMACSWW +// Supported forms : (2 forms) +// +// * VPMACSWW xmm, xmm, xmm, xmm [XOP] +// * VPMACSWW xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMACSWW(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMACSWW", 4, Operands { v0, v1, v2, v3 }) + // VPMACSWW xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0x95) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMACSWW xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x95) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMACSWW") + } + return p +} + +// VPMADCSSWD performs "Packed Multiply Add Accumulate with Saturation Signed Word to Signed Doubleword". +// +// Mnemonic : VPMADCSSWD +// Supported forms : (2 forms) +// +// * VPMADCSSWD xmm, xmm, xmm, xmm [XOP] +// * VPMADCSSWD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMADCSSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMADCSSWD", 4, Operands { v0, v1, v2, v3 }) + // VPMADCSSWD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xa6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMADCSSWD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xa6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADCSSWD") + } + return p +} + +// VPMADCSWD performs "Packed Multiply Add Accumulate Signed Word to Signed Doubleword". +// +// Mnemonic : VPMADCSWD +// Supported forms : (2 forms) +// +// * VPMADCSWD xmm, xmm, xmm, xmm [XOP] +// * VPMADCSWD xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPMADCSWD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPMADCSWD", 4, Operands { v0, v1, v2, v3 }) + // VPMADCSWD xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xb6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + } + // VPMADCSWD xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xb6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADCSWD") + } + return p +} + +// VPMADD52HUQ performs "Packed Multiply of Unsigned 52-bit Unsigned Integers and Add High 52-bit Products to Quadword Accumulators". +// +// Mnemonic : VPMADD52HUQ +// Supported forms : (6 forms) +// +// * VPMADD52HUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52HUQ xmm, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52HUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52HUQ ymm, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52HUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512IFMA] +// * VPMADD52HUQ zmm, zmm, zmm{k}{z} [AVX512IFMA] +// +func (self *Program) VPMADD52HUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMADD52HUQ", 3, Operands { v0, v1, v2 }) + // VPMADD52HUQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb5) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMADD52HUQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADD52HUQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb5) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMADD52HUQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADD52HUQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512IFMA) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb5) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMADD52HUQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512IFMA) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADD52HUQ") + } + return p +} + +// VPMADD52LUQ performs "Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit Products to Quadword Accumulators". +// +// Mnemonic : VPMADD52LUQ +// Supported forms : (6 forms) +// +// * VPMADD52LUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52LUQ xmm, xmm, xmm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52LUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52LUQ ymm, ymm, ymm{k}{z} [AVX512IFMA,AVX512VL] +// * VPMADD52LUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512IFMA] +// * VPMADD52LUQ zmm, zmm, zmm{k}{z} [AVX512IFMA] +// +func (self *Program) VPMADD52LUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMADD52LUQ", 3, Operands { v0, v1, v2 }) + // VPMADD52LUQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb4) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMADD52LUQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xb4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADD52LUQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb4) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMADD52LUQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512IFMA | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xb4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADD52LUQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512IFMA) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xb4) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMADD52LUQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512IFMA) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xb4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADD52LUQ") + } + return p +} + +// VPMADDUBSW performs "Multiply and Add Packed Signed and Unsigned Byte Integers". +// +// Mnemonic : VPMADDUBSW +// Supported forms : (10 forms) +// +// * VPMADDUBSW xmm, xmm, xmm [AVX] +// * VPMADDUBSW m128, xmm, xmm [AVX] +// * VPMADDUBSW ymm, ymm, ymm [AVX2] +// * VPMADDUBSW m256, ymm, ymm [AVX2] +// * VPMADDUBSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMADDUBSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMADDUBSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDUBSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDUBSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDUBSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMADDUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMADDUBSW", 3, Operands { v0, v1, v2 }) + // VPMADDUBSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMADDUBSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMADDUBSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMADDUBSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMADDUBSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x04) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDUBSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x04) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADDUBSW") + } + return p +} + +// VPMADDWD performs "Multiply and Add Packed Signed Word Integers". +// +// Mnemonic : VPMADDWD +// Supported forms : (10 forms) +// +// * VPMADDWD xmm, xmm, xmm [AVX] +// * VPMADDWD m128, xmm, xmm [AVX] +// * VPMADDWD ymm, ymm, ymm [AVX2] +// * VPMADDWD m256, ymm, ymm [AVX2] +// * VPMADDWD zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMADDWD m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMADDWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMADDWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMADDWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMADDWD", 3, Operands { v0, v1, v2 }) + // VPMADDWD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMADDWD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMADDWD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMADDWD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMADDWD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMADDWD m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf5) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMADDWD") + } + return p +} + +// VPMASKMOVD performs "Conditional Move Packed Doubleword Integers". +// +// Mnemonic : VPMASKMOVD +// Supported forms : (4 forms) +// +// * VPMASKMOVD m128, xmm, xmm [AVX2] +// * VPMASKMOVD m256, ymm, ymm [AVX2] +// * VPMASKMOVD xmm, xmm, m128 [AVX2] +// * VPMASKMOVD ymm, ymm, m256 [AVX2] +// +func (self *Program) VPMASKMOVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMASKMOVD", 3, Operands { v0, v1, v2 }) + // VPMASKMOVD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x8c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMASKMOVD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x8c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMASKMOVD xmm, xmm, m128 + if isXMM(v0) && isXMM(v1) && isM128(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x8e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + // VPMASKMOVD ymm, ymm, m256 + if isYMM(v0) && isYMM(v1) && isM256(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x8e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPMASKMOVD") + } + return p +} + +// VPMASKMOVQ performs "Conditional Move Packed Quadword Integers". +// +// Mnemonic : VPMASKMOVQ +// Supported forms : (4 forms) +// +// * VPMASKMOVQ m128, xmm, xmm [AVX2] +// * VPMASKMOVQ m256, ymm, ymm [AVX2] +// * VPMASKMOVQ xmm, xmm, m128 [AVX2] +// * VPMASKMOVQ ymm, ymm, m256 [AVX2] +// +func (self *Program) VPMASKMOVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMASKMOVQ", 3, Operands { v0, v1, v2 }) + // VPMASKMOVQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x8c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMASKMOVQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x8c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMASKMOVQ xmm, xmm, m128 + if isXMM(v0) && isXMM(v1) && isM128(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x8e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + // VPMASKMOVQ ymm, ymm, m256 + if isYMM(v0) && isYMM(v1) && isM256(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[0]), addr(v[2]), hlcode(v[1])) + m.emit(0x8e) + m.mrsd(lcode(v[0]), addr(v[2]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPMASKMOVQ") + } + return p +} + +// VPMAXSB performs "Maximum of Packed Signed Byte Integers". +// +// Mnemonic : VPMAXSB +// Supported forms : (10 forms) +// +// * VPMAXSB xmm, xmm, xmm [AVX] +// * VPMAXSB m128, xmm, xmm [AVX] +// * VPMAXSB ymm, ymm, ymm [AVX2] +// * VPMAXSB m256, ymm, ymm [AVX2] +// * VPMAXSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMAXSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXSB", 3, Operands { v0, v1, v2 }) + // VPMAXSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXSB") + } + return p +} + +// VPMAXSD performs "Maximum of Packed Signed Doubleword Integers". +// +// Mnemonic : VPMAXSD +// Supported forms : (10 forms) +// +// * VPMAXSD xmm, xmm, xmm [AVX] +// * VPMAXSD m128, xmm, xmm [AVX] +// * VPMAXSD ymm, ymm, ymm [AVX2] +// * VPMAXSD m256, ymm, ymm [AVX2] +// * VPMAXSD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMAXSD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMAXSD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMAXSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXSD", 3, Operands { v0, v1, v2 }) + // VPMAXSD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXSD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXSD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMAXSD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXSD") + } + return p +} + +// VPMAXSQ performs "Maximum of Packed Signed Quadword Integers". +// +// Mnemonic : VPMAXSQ +// Supported forms : (6 forms) +// +// * VPMAXSQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMAXSQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMAXSQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMAXSQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMAXSQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXSQ", 3, Operands { v0, v1, v2 }) + // VPMAXSQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXSQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXSQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMAXSQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXSQ") + } + return p +} + +// VPMAXSW performs "Maximum of Packed Signed Word Integers". +// +// Mnemonic : VPMAXSW +// Supported forms : (10 forms) +// +// * VPMAXSW xmm, xmm, xmm [AVX] +// * VPMAXSW m128, xmm, xmm [AVX] +// * VPMAXSW ymm, ymm, ymm [AVX2] +// * VPMAXSW m256, ymm, ymm [AVX2] +// * VPMAXSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMAXSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXSW", 3, Operands { v0, v1, v2 }) + // VPMAXSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xee) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xee) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXSW") + } + return p +} + +// VPMAXUB performs "Maximum of Packed Unsigned Byte Integers". +// +// Mnemonic : VPMAXUB +// Supported forms : (10 forms) +// +// * VPMAXUB xmm, xmm, xmm [AVX] +// * VPMAXUB m128, xmm, xmm [AVX] +// * VPMAXUB ymm, ymm, ymm [AVX2] +// * VPMAXUB m256, ymm, ymm [AVX2] +// * VPMAXUB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXUB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXUB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMAXUB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXUB", 3, Operands { v0, v1, v2 }) + // VPMAXUB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXUB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXUB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xde) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xde) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXUB") + } + return p +} + +// VPMAXUD performs "Maximum of Packed Unsigned Doubleword Integers". +// +// Mnemonic : VPMAXUD +// Supported forms : (10 forms) +// +// * VPMAXUD xmm, xmm, xmm [AVX] +// * VPMAXUD m128, xmm, xmm [AVX] +// * VPMAXUD ymm, ymm, ymm [AVX2] +// * VPMAXUD m256, ymm, ymm [AVX2] +// * VPMAXUD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMAXUD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMAXUD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMAXUD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXUD", 3, Operands { v0, v1, v2 }) + // VPMAXUD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXUD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXUD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMAXUD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXUD") + } + return p +} + +// VPMAXUQ performs "Maximum of Packed Unsigned Quadword Integers". +// +// Mnemonic : VPMAXUQ +// Supported forms : (6 forms) +// +// * VPMAXUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMAXUQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMAXUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMAXUQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMAXUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXUQ", 3, Operands { v0, v1, v2 }) + // VPMAXUQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXUQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXUQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3f) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMAXUQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXUQ") + } + return p +} + +// VPMAXUW performs "Maximum of Packed Unsigned Word Integers". +// +// Mnemonic : VPMAXUW +// Supported forms : (10 forms) +// +// * VPMAXUW xmm, xmm, xmm [AVX] +// * VPMAXUW m128, xmm, xmm [AVX] +// * VPMAXUW ymm, ymm, ymm [AVX2] +// * VPMAXUW m256, ymm, ymm [AVX2] +// * VPMAXUW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXUW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMAXUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMAXUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMAXUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMAXUW", 3, Operands { v0, v1, v2 }) + // VPMAXUW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMAXUW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMAXUW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMAXUW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMAXUW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3e) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMAXUW") + } + return p +} + +// VPMINSB performs "Minimum of Packed Signed Byte Integers". +// +// Mnemonic : VPMINSB +// Supported forms : (10 forms) +// +// * VPMINSB xmm, xmm, xmm [AVX] +// * VPMINSB m128, xmm, xmm [AVX] +// * VPMINSB ymm, ymm, ymm [AVX2] +// * VPMINSB m256, ymm, ymm [AVX2] +// * VPMINSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMINSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMINSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMINSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINSB", 3, Operands { v0, v1, v2 }) + // VPMINSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x38) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x38) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINSB") + } + return p +} + +// VPMINSD performs "Minimum of Packed Signed Doubleword Integers". +// +// Mnemonic : VPMINSD +// Supported forms : (10 forms) +// +// * VPMINSD xmm, xmm, xmm [AVX] +// * VPMINSD m128, xmm, xmm [AVX] +// * VPMINSD ymm, ymm, ymm [AVX2] +// * VPMINSD m256, ymm, ymm [AVX2] +// * VPMINSD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMINSD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMINSD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINSD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINSD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMINSD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMINSD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINSD", 3, Operands { v0, v1, v2 }) + // VPMINSD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINSD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINSD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMINSD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINSD") + } + return p +} + +// VPMINSQ performs "Minimum of Packed Signed Quadword Integers". +// +// Mnemonic : VPMINSQ +// Supported forms : (6 forms) +// +// * VPMINSQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMINSQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMINSQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINSQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINSQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMINSQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMINSQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINSQ", 3, Operands { v0, v1, v2 }) + // VPMINSQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINSQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINSQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x39) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMINSQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x39) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINSQ") + } + return p +} + +// VPMINSW performs "Minimum of Packed Signed Word Integers". +// +// Mnemonic : VPMINSW +// Supported forms : (10 forms) +// +// * VPMINSW xmm, xmm, xmm [AVX] +// * VPMINSW m128, xmm, xmm [AVX] +// * VPMINSW ymm, ymm, ymm [AVX2] +// * VPMINSW m256, ymm, ymm [AVX2] +// * VPMINSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMINSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMINSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMINSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMINSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINSW", 3, Operands { v0, v1, v2 }) + // VPMINSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xea) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xea) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINSW") + } + return p +} + +// VPMINUB performs "Minimum of Packed Unsigned Byte Integers". +// +// Mnemonic : VPMINUB +// Supported forms : (10 forms) +// +// * VPMINUB xmm, xmm, xmm [AVX] +// * VPMINUB m128, xmm, xmm [AVX] +// * VPMINUB ymm, ymm, ymm [AVX2] +// * VPMINUB m256, ymm, ymm [AVX2] +// * VPMINUB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMINUB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMINUB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMINUB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINUB", 3, Operands { v0, v1, v2 }) + // VPMINUB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINUB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINUB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xda) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xda) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINUB") + } + return p +} + +// VPMINUD performs "Minimum of Packed Unsigned Doubleword Integers". +// +// Mnemonic : VPMINUD +// Supported forms : (10 forms) +// +// * VPMINUD xmm, xmm, xmm [AVX] +// * VPMINUD m128, xmm, xmm [AVX] +// * VPMINUD ymm, ymm, ymm [AVX2] +// * VPMINUD m256, ymm, ymm [AVX2] +// * VPMINUD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMINUD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMINUD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINUD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINUD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMINUD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMINUD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINUD", 3, Operands { v0, v1, v2 }) + // VPMINUD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINUD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINUD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMINUD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINUD") + } + return p +} + +// VPMINUQ performs "Minimum of Packed Unsigned Quadword Integers". +// +// Mnemonic : VPMINUQ +// Supported forms : (6 forms) +// +// * VPMINUQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMINUQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMINUQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINUQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMINUQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMINUQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMINUQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINUQ", 3, Operands { v0, v1, v2 }) + // VPMINUQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINUQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINUQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x3b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMINUQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINUQ") + } + return p +} + +// VPMINUW performs "Minimum of Packed Unsigned Word Integers". +// +// Mnemonic : VPMINUW +// Supported forms : (10 forms) +// +// * VPMINUW xmm, xmm, xmm [AVX] +// * VPMINUW m128, xmm, xmm [AVX] +// * VPMINUW ymm, ymm, ymm [AVX2] +// * VPMINUW m256, ymm, ymm [AVX2] +// * VPMINUW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMINUW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMINUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMINUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMINUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMINUW", 3, Operands { v0, v1, v2 }) + // VPMINUW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMINUW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMINUW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMINUW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x3a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMINUW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x3a) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMINUW") + } + return p +} + +// VPMOVB2M performs "Move Signs of Packed Byte Integers to Mask Register". +// +// Mnemonic : VPMOVB2M +// Supported forms : (3 forms) +// +// * VPMOVB2M zmm, k [AVX512BW] +// * VPMOVB2M xmm, k [AVX512BW,AVX512VL] +// * VPMOVB2M ymm, k [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVB2M(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVB2M", 2, Operands { v0, v1 }) + // VPMOVB2M zmm, k + if isZMM(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVB2M xmm, k + if isEVEXXMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVB2M ymm, k + if isEVEXYMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVB2M") + } + return p +} + +// VPMOVD2M performs "Move Signs of Packed Doubleword Integers to Mask Register". +// +// Mnemonic : VPMOVD2M +// Supported forms : (3 forms) +// +// * VPMOVD2M zmm, k [AVX512DQ] +// * VPMOVD2M xmm, k [AVX512DQ,AVX512VL] +// * VPMOVD2M ymm, k [AVX512DQ,AVX512VL] +// +func (self *Program) VPMOVD2M(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVD2M", 2, Operands { v0, v1 }) + // VPMOVD2M zmm, k + if isZMM(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVD2M xmm, k + if isEVEXXMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVD2M ymm, k + if isEVEXYMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVD2M") + } + return p +} + +// VPMOVDB performs "Down Convert Packed Doubleword Values to Byte Values with Truncation". +// +// Mnemonic : VPMOVDB +// Supported forms : (6 forms) +// +// * VPMOVDB zmm, xmm{k}{z} [AVX512F] +// * VPMOVDB zmm, m128{k}{z} [AVX512F] +// * VPMOVDB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVDB xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVDB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVDB ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVDB", 2, Operands { v0, v1 }) + // VPMOVDB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDB zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVDB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDB xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVDB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDB ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVDB") + } + return p +} + +// VPMOVDW performs "Down Convert Packed Doubleword Values to Word Values with Truncation". +// +// Mnemonic : VPMOVDW +// Supported forms : (6 forms) +// +// * VPMOVDW zmm, ymm{k}{z} [AVX512F] +// * VPMOVDW zmm, m256{k}{z} [AVX512F] +// * VPMOVDW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVDW xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVDW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVDW ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVDW", 2, Operands { v0, v1 }) + // VPMOVDW zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x33) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDW zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVDW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x33) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDW xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVDW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x33) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVDW ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVDW") + } + return p +} + +// VPMOVM2B performs "Expand Bits of Mask Register to Packed Byte Integers". +// +// Mnemonic : VPMOVM2B +// Supported forms : (3 forms) +// +// * VPMOVM2B k, zmm [AVX512BW] +// * VPMOVM2B k, xmm [AVX512BW,AVX512VL] +// * VPMOVM2B k, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVM2B(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVM2B", 2, Operands { v0, v1 }) + // VPMOVM2B k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2B k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2B k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVM2B") + } + return p +} + +// VPMOVM2D performs "Expand Bits of Mask Register to Packed Doubleword Integers". +// +// Mnemonic : VPMOVM2D +// Supported forms : (3 forms) +// +// * VPMOVM2D k, zmm [AVX512DQ] +// * VPMOVM2D k, xmm [AVX512DQ,AVX512VL] +// * VPMOVM2D k, ymm [AVX512DQ,AVX512VL] +// +func (self *Program) VPMOVM2D(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVM2D", 2, Operands { v0, v1 }) + // VPMOVM2D k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x48) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2D k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x08) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2D k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7e) + m.emit(0x28) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVM2D") + } + return p +} + +// VPMOVM2Q performs "Expand Bits of Mask Register to Packed Quadword Integers". +// +// Mnemonic : VPMOVM2Q +// Supported forms : (3 forms) +// +// * VPMOVM2Q k, zmm [AVX512DQ] +// * VPMOVM2Q k, xmm [AVX512DQ,AVX512VL] +// * VPMOVM2Q k, ymm [AVX512DQ,AVX512VL] +// +func (self *Program) VPMOVM2Q(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVM2Q", 2, Operands { v0, v1 }) + // VPMOVM2Q k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2Q k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2Q k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x38) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVM2Q") + } + return p +} + +// VPMOVM2W performs "Expand Bits of Mask Register to Packed Word Integers". +// +// Mnemonic : VPMOVM2W +// Supported forms : (3 forms) +// +// * VPMOVM2W k, zmm [AVX512BW] +// * VPMOVM2W k, xmm [AVX512BW,AVX512VL] +// * VPMOVM2W k, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVM2W(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVM2W", 2, Operands { v0, v1 }) + // VPMOVM2W k, zmm + if isK(v0) && isZMM(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2W k, xmm + if isK(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVM2W k, ymm + if isK(v0) && isEVEXYMM(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x28) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVM2W") + } + return p +} + +// VPMOVMSKB performs "Move Byte Mask". +// +// Mnemonic : VPMOVMSKB +// Supported forms : (2 forms) +// +// * VPMOVMSKB xmm, r32 [AVX] +// * VPMOVMSKB ymm, r32 [AVX2] +// +func (self *Program) VPMOVMSKB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVMSKB", 2, Operands { v0, v1 }) + // VPMOVMSKB xmm, r32 + if isXMM(v0) && isReg32(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVMSKB ymm, r32 + if isYMM(v0) && isReg32(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0xd7) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVMSKB") + } + return p +} + +// VPMOVQ2M performs "Move Signs of Packed Quadword Integers to Mask Register". +// +// Mnemonic : VPMOVQ2M +// Supported forms : (3 forms) +// +// * VPMOVQ2M zmm, k [AVX512DQ] +// * VPMOVQ2M xmm, k [AVX512DQ,AVX512VL] +// * VPMOVQ2M ymm, k [AVX512DQ,AVX512VL] +// +func (self *Program) VPMOVQ2M(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVQ2M", 2, Operands { v0, v1 }) + // VPMOVQ2M zmm, k + if isZMM(v0) && isK(v1) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVQ2M xmm, k + if isEVEXXMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVQ2M ymm, k + if isEVEXYMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x39) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVQ2M") + } + return p +} + +// VPMOVQB performs "Down Convert Packed Quadword Values to Byte Values with Truncation". +// +// Mnemonic : VPMOVQB +// Supported forms : (6 forms) +// +// * VPMOVQB zmm, xmm{k}{z} [AVX512F] +// * VPMOVQB zmm, m64{k}{z} [AVX512F] +// * VPMOVQB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQB xmm, m16{k}{z} [AVX512F,AVX512VL] +// * VPMOVQB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQB ymm, m32{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVQB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVQB", 2, Operands { v0, v1 }) + // VPMOVQB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x32) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQB zmm, m64{k}{z} + if isZMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVQB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x32) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQB xmm, m16{k}{z} + if isEVEXXMM(v0) && isM16kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[0]), addr(v[1]), 2) + }) + } + // VPMOVQB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x32) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQB ymm, m32{k}{z} + if isEVEXYMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVQB") + } + return p +} + +// VPMOVQD performs "Down Convert Packed Quadword Values to Doubleword Values with Truncation". +// +// Mnemonic : VPMOVQD +// Supported forms : (6 forms) +// +// * VPMOVQD zmm, ymm{k}{z} [AVX512F] +// * VPMOVQD zmm, m256{k}{z} [AVX512F] +// * VPMOVQD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQD xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVQD ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQD ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVQD", 2, Operands { v0, v1 }) + // VPMOVQD zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x35) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQD zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVQD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x35) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQD xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVQD ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x35) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQD ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVQD") + } + return p +} + +// VPMOVQW performs "Down Convert Packed Quadword Values to Word Values with Truncation". +// +// Mnemonic : VPMOVQW +// Supported forms : (6 forms) +// +// * VPMOVQW zmm, xmm{k}{z} [AVX512F] +// * VPMOVQW zmm, m128{k}{z} [AVX512F] +// * VPMOVQW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQW xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVQW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVQW ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVQW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVQW", 2, Operands { v0, v1 }) + // VPMOVQW zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x34) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQW zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVQW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x34) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQW xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVQW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x34) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVQW ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVQW") + } + return p +} + +// VPMOVSDB performs "Down Convert Packed Doubleword Values to Byte Values with Signed Saturation". +// +// Mnemonic : VPMOVSDB +// Supported forms : (6 forms) +// +// * VPMOVSDB zmm, xmm{k}{z} [AVX512F] +// * VPMOVSDB zmm, m128{k}{z} [AVX512F] +// * VPMOVSDB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDB xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDB ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSDB", 2, Operands { v0, v1 }) + // VPMOVSDB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDB zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVSDB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDB xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVSDB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x21) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDB ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSDB") + } + return p +} + +// VPMOVSDW performs "Down Convert Packed Doubleword Values to Word Values with Signed Saturation". +// +// Mnemonic : VPMOVSDW +// Supported forms : (6 forms) +// +// * VPMOVSDW zmm, ymm{k}{z} [AVX512F] +// * VPMOVSDW zmm, m256{k}{z} [AVX512F] +// * VPMOVSDW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDW xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSDW ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSDW", 2, Operands { v0, v1 }) + // VPMOVSDW zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x23) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDW zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVSDW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x23) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDW xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVSDW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x23) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSDW ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSDW") + } + return p +} + +// VPMOVSQB performs "Down Convert Packed Quadword Values to Byte Values with Signed Saturation". +// +// Mnemonic : VPMOVSQB +// Supported forms : (6 forms) +// +// * VPMOVSQB zmm, xmm{k}{z} [AVX512F] +// * VPMOVSQB zmm, m64{k}{z} [AVX512F] +// * VPMOVSQB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQB xmm, m16{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQB ymm, m32{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSQB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSQB", 2, Operands { v0, v1 }) + // VPMOVSQB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x22) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQB zmm, m64{k}{z} + if isZMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVSQB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x22) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQB xmm, m16{k}{z} + if isEVEXXMM(v0) && isM16kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[0]), addr(v[1]), 2) + }) + } + // VPMOVSQB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x22) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQB ymm, m32{k}{z} + if isEVEXYMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSQB") + } + return p +} + +// VPMOVSQD performs "Down Convert Packed Quadword Values to Doubleword Values with Signed Saturation". +// +// Mnemonic : VPMOVSQD +// Supported forms : (6 forms) +// +// * VPMOVSQD zmm, ymm{k}{z} [AVX512F] +// * VPMOVSQD zmm, m256{k}{z} [AVX512F] +// * VPMOVSQD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQD xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQD ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQD ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSQD", 2, Operands { v0, v1 }) + // VPMOVSQD zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x25) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQD zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVSQD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x25) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQD xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVSQD ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x25) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQD ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSQD") + } + return p +} + +// VPMOVSQW performs "Down Convert Packed Quadword Values to Word Values with Signed Saturation". +// +// Mnemonic : VPMOVSQW +// Supported forms : (6 forms) +// +// * VPMOVSQW zmm, xmm{k}{z} [AVX512F] +// * VPMOVSQW zmm, m128{k}{z} [AVX512F] +// * VPMOVSQW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQW xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSQW ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSQW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSQW", 2, Operands { v0, v1 }) + // VPMOVSQW zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x24) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQW zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVSQW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x24) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQW xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVSQW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x24) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSQW ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSQW") + } + return p +} + +// VPMOVSWB performs "Down Convert Packed Word Values to Byte Values with Signed Saturation". +// +// Mnemonic : VPMOVSWB +// Supported forms : (6 forms) +// +// * VPMOVSWB zmm, ymm{k}{z} [AVX512BW] +// * VPMOVSWB zmm, m256{k}{z} [AVX512BW] +// * VPMOVSWB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSWB xmm, m64{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSWB ymm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSWB ymm, m128{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVSWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSWB", 2, Operands { v0, v1 }) + // VPMOVSWB zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x20) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSWB zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVSWB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x20) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSWB xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVSWB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x20) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVSWB ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSWB") + } + return p +} + +// VPMOVSXBD performs "Move Packed Byte Integers to Doubleword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXBD +// Supported forms : (10 forms) +// +// * VPMOVSXBD xmm, xmm [AVX] +// * VPMOVSXBD m32, xmm [AVX] +// * VPMOVSXBD xmm, ymm [AVX2] +// * VPMOVSXBD m64, ymm [AVX2] +// * VPMOVSXBD xmm, zmm{k}{z} [AVX512F] +// * VPMOVSXBD m128, zmm{k}{z} [AVX512F] +// * VPMOVSXBD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBD m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBD m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXBD", 2, Operands { v0, v1 }) + // VPMOVSXBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBD xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPMOVSXBD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x21) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBD m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPMOVSXBD m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x21) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXBD") + } + return p +} + +// VPMOVSXBQ performs "Move Packed Byte Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXBQ +// Supported forms : (10 forms) +// +// * VPMOVSXBQ xmm, xmm [AVX] +// * VPMOVSXBQ m16, xmm [AVX] +// * VPMOVSXBQ xmm, ymm [AVX2] +// * VPMOVSXBQ m32, ymm [AVX2] +// * VPMOVSXBQ xmm, zmm{k}{z} [AVX512F] +// * VPMOVSXBQ m64, zmm{k}{z} [AVX512F] +// * VPMOVSXBQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBQ m16, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXBQ m32, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXBQ", 2, Operands { v0, v1 }) + // VPMOVSXBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ m32, ymm + if isM32(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVSXBQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x22) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBQ m16, xmm{k}{z} + if isM16(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + // VPMOVSXBQ m32, ymm{k}{z} + if isM32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x22) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXBQ") + } + return p +} + +// VPMOVSXBW performs "Move Packed Byte Integers to Word Integers with Sign Extension". +// +// Mnemonic : VPMOVSXBW +// Supported forms : (10 forms) +// +// * VPMOVSXBW xmm, xmm [AVX] +// * VPMOVSXBW m64, xmm [AVX] +// * VPMOVSXBW xmm, ymm [AVX2] +// * VPMOVSXBW m128, ymm [AVX2] +// * VPMOVSXBW ymm, zmm{k}{z} [AVX512BW] +// * VPMOVSXBW m256, zmm{k}{z} [AVX512BW] +// * VPMOVSXBW xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSXBW xmm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSXBW m64, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVSXBW m128, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVSXBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXBW", 2, Operands { v0, v1 }) + // VPMOVSXBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBW xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXBW ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVSXBW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x20) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXBW m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVSXBW m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x20) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXBW") + } + return p +} + +// VPMOVSXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXDQ +// Supported forms : (10 forms) +// +// * VPMOVSXDQ xmm, xmm [AVX] +// * VPMOVSXDQ m64, xmm [AVX] +// * VPMOVSXDQ xmm, ymm [AVX2] +// * VPMOVSXDQ m128, ymm [AVX2] +// * VPMOVSXDQ ymm, zmm{k}{z} [AVX512F] +// * VPMOVSXDQ m256, zmm{k}{z} [AVX512F] +// * VPMOVSXDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXDQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXDQ m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXDQ m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXDQ", 2, Operands { v0, v1 }) + // VPMOVSXDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXDQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXDQ ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVSXDQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x25) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXDQ m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVSXDQ m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x25) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXDQ") + } + return p +} + +// VPMOVSXWD performs "Move Packed Word Integers to Doubleword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXWD +// Supported forms : (10 forms) +// +// * VPMOVSXWD xmm, xmm [AVX] +// * VPMOVSXWD m64, xmm [AVX] +// * VPMOVSXWD xmm, ymm [AVX2] +// * VPMOVSXWD m128, ymm [AVX2] +// * VPMOVSXWD ymm, zmm{k}{z} [AVX512F] +// * VPMOVSXWD m256, zmm{k}{z} [AVX512F] +// * VPMOVSXWD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWD m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWD m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXWD", 2, Operands { v0, v1 }) + // VPMOVSXWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXWD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXWD ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVSXWD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x23) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWD m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVSXWD m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x23) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXWD") + } + return p +} + +// VPMOVSXWQ performs "Move Packed Word Integers to Quadword Integers with Sign Extension". +// +// Mnemonic : VPMOVSXWQ +// Supported forms : (10 forms) +// +// * VPMOVSXWQ xmm, xmm [AVX] +// * VPMOVSXWQ m32, xmm [AVX] +// * VPMOVSXWQ xmm, ymm [AVX2] +// * VPMOVSXWQ m64, ymm [AVX2] +// * VPMOVSXWQ xmm, zmm{k}{z} [AVX512F] +// * VPMOVSXWQ m128, zmm{k}{z} [AVX512F] +// * VPMOVSXWQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWQ m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVSXWQ m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVSXWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVSXWQ", 2, Operands { v0, v1 }) + // VPMOVSXWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXWQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVSXWQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPMOVSXWQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x24) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVSXWQ m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPMOVSXWQ m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x24) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVSXWQ") + } + return p +} + +// VPMOVUSDB performs "Down Convert Packed Doubleword Values to Byte Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSDB +// Supported forms : (6 forms) +// +// * VPMOVUSDB zmm, xmm{k}{z} [AVX512F] +// * VPMOVUSDB zmm, m128{k}{z} [AVX512F] +// * VPMOVUSDB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDB xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDB ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSDB", 2, Operands { v0, v1 }) + // VPMOVUSDB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDB zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVUSDB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDB xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVUSDB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x11) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDB ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x11) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSDB") + } + return p +} + +// VPMOVUSDW performs "Down Convert Packed Doubleword Values to Word Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSDW +// Supported forms : (6 forms) +// +// * VPMOVUSDW zmm, ymm{k}{z} [AVX512F] +// * VPMOVUSDW zmm, m256{k}{z} [AVX512F] +// * VPMOVUSDW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDW xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSDW ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSDW", 2, Operands { v0, v1 }) + // VPMOVUSDW zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x13) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDW zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVUSDW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x13) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDW xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVUSDW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x13) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSDW ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x13) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSDW") + } + return p +} + +// VPMOVUSQB performs "Down Convert Packed Quadword Values to Byte Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSQB +// Supported forms : (6 forms) +// +// * VPMOVUSQB zmm, xmm{k}{z} [AVX512F] +// * VPMOVUSQB zmm, m64{k}{z} [AVX512F] +// * VPMOVUSQB xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQB xmm, m16{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQB ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQB ymm, m32{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSQB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSQB", 2, Operands { v0, v1 }) + // VPMOVUSQB zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x12) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQB zmm, m64{k}{z} + if isZMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVUSQB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x12) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQB xmm, m16{k}{z} + if isEVEXXMM(v0) && isM16kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[0]), addr(v[1]), 2) + }) + } + // VPMOVUSQB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x12) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQB ymm, m32{k}{z} + if isEVEXYMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x12) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSQB") + } + return p +} + +// VPMOVUSQD performs "Down Convert Packed Quadword Values to Doubleword Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSQD +// Supported forms : (6 forms) +// +// * VPMOVUSQD zmm, ymm{k}{z} [AVX512F] +// * VPMOVUSQD zmm, m256{k}{z} [AVX512F] +// * VPMOVUSQD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQD xmm, m64{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQD ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQD ymm, m128{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSQD", 2, Operands { v0, v1 }) + // VPMOVUSQD zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x15) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQD zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x15) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVUSQD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x15) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQD xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x15) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVUSQD ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x15) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQD ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x15) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSQD") + } + return p +} + +// VPMOVUSQW performs "Down Convert Packed Quadword Values to Word Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSQW +// Supported forms : (6 forms) +// +// * VPMOVUSQW zmm, xmm{k}{z} [AVX512F] +// * VPMOVUSQW zmm, m128{k}{z} [AVX512F] +// * VPMOVUSQW xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQW xmm, m32{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQW ymm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVUSQW ymm, m64{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVUSQW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSQW", 2, Operands { v0, v1 }) + // VPMOVUSQW zmm, xmm{k}{z} + if isZMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x14) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQW zmm, m128{k}{z} + if isZMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x14) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + // VPMOVUSQW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x14) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQW xmm, m32{k}{z} + if isEVEXXMM(v0) && isM32kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x14) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPMOVUSQW ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x14) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSQW ymm, m64{k}{z} + if isEVEXYMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x14) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSQW") + } + return p +} + +// VPMOVUSWB performs "Down Convert Packed Word Values to Byte Values with Unsigned Saturation". +// +// Mnemonic : VPMOVUSWB +// Supported forms : (6 forms) +// +// * VPMOVUSWB zmm, ymm{k}{z} [AVX512BW] +// * VPMOVUSWB zmm, m256{k}{z} [AVX512BW] +// * VPMOVUSWB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVUSWB xmm, m64{k}{z} [AVX512BW,AVX512VL] +// * VPMOVUSWB ymm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVUSWB ymm, m128{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVUSWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVUSWB", 2, Operands { v0, v1 }) + // VPMOVUSWB zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x10) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSWB zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVUSWB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x10) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSWB xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVUSWB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x10) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVUSWB ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x10) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVUSWB") + } + return p +} + +// VPMOVW2M performs "Move Signs of Packed Word Integers to Mask Register". +// +// Mnemonic : VPMOVW2M +// Supported forms : (3 forms) +// +// * VPMOVW2M zmm, k [AVX512BW] +// * VPMOVW2M xmm, k [AVX512BW,AVX512VL] +// * VPMOVW2M ymm, k [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVW2M(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVW2M", 2, Operands { v0, v1 }) + // VPMOVW2M zmm, k + if isZMM(v0) && isK(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x48) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVW2M xmm, k + if isEVEXXMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x08) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVW2M ymm, k + if isEVEXYMM(v0) && isK(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfe) + m.emit(0x28) + m.emit(0x29) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVW2M") + } + return p +} + +// VPMOVWB performs "Down Convert Packed Word Values to Byte Values with Truncation". +// +// Mnemonic : VPMOVWB +// Supported forms : (6 forms) +// +// * VPMOVWB zmm, ymm{k}{z} [AVX512BW] +// * VPMOVWB zmm, m256{k}{z} [AVX512BW] +// * VPMOVWB xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVWB xmm, m64{k}{z} [AVX512BW,AVX512VL] +// * VPMOVWB ymm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVWB ymm, m128{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVWB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVWB", 2, Operands { v0, v1 }) + // VPMOVWB zmm, ymm{k}{z} + if isZMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x30) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVWB zmm, m256{k}{z} + if isZMM(v0) && isM256kz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[0]), addr(v[1]), 32) + }) + } + // VPMOVWB xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x30) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVWB xmm, m64{k}{z} + if isEVEXXMM(v0) && isM64kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPMOVWB ymm, xmm{k}{z} + if isEVEXYMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[0]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[0]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x30) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // VPMOVWB ymm, m128{k}{z} + if isEVEXYMM(v0) && isM128kz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[0]), addr(v[1]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVWB") + } + return p +} + +// VPMOVZXBD performs "Move Packed Byte Integers to Doubleword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXBD +// Supported forms : (10 forms) +// +// * VPMOVZXBD xmm, xmm [AVX] +// * VPMOVZXBD m32, xmm [AVX] +// * VPMOVZXBD xmm, ymm [AVX2] +// * VPMOVZXBD m64, ymm [AVX2] +// * VPMOVZXBD xmm, zmm{k}{z} [AVX512F] +// * VPMOVZXBD m128, zmm{k}{z} [AVX512F] +// * VPMOVZXBD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBD m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBD m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXBD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXBD", 2, Operands { v0, v1 }) + // VPMOVZXBD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBD xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPMOVZXBD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x31) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBD m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPMOVZXBD m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x31) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXBD") + } + return p +} + +// VPMOVZXBQ performs "Move Packed Byte Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXBQ +// Supported forms : (10 forms) +// +// * VPMOVZXBQ xmm, xmm [AVX] +// * VPMOVZXBQ m16, xmm [AVX] +// * VPMOVZXBQ xmm, ymm [AVX2] +// * VPMOVZXBQ m32, ymm [AVX2] +// * VPMOVZXBQ xmm, zmm{k}{z} [AVX512F] +// * VPMOVZXBQ m64, zmm{k}{z} [AVX512F] +// * VPMOVZXBQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBQ m16, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXBQ m32, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXBQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXBQ", 2, Operands { v0, v1 }) + // VPMOVZXBQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ m16, xmm + if isM16(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ m32, ymm + if isM32(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ m64, zmm{k}{z} + if isM64(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVZXBQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBQ m16, xmm{k}{z} + if isM16(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 2) + }) + } + // VPMOVZXBQ m32, ymm{k}{z} + if isM32(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXBQ") + } + return p +} + +// VPMOVZXBW performs "Move Packed Byte Integers to Word Integers with Zero Extension". +// +// Mnemonic : VPMOVZXBW +// Supported forms : (10 forms) +// +// * VPMOVZXBW xmm, xmm [AVX] +// * VPMOVZXBW m64, xmm [AVX] +// * VPMOVZXBW xmm, ymm [AVX2] +// * VPMOVZXBW m128, ymm [AVX2] +// * VPMOVZXBW ymm, zmm{k}{z} [AVX512BW] +// * VPMOVZXBW m256, zmm{k}{z} [AVX512BW] +// * VPMOVZXBW xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVZXBW xmm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVZXBW m64, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMOVZXBW m128, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMOVZXBW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXBW", 2, Operands { v0, v1 }) + // VPMOVZXBW xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBW xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXBW ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVZXBW xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x30) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXBW m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVZXBW m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x30) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXBW") + } + return p +} + +// VPMOVZXDQ performs "Move Packed Doubleword Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXDQ +// Supported forms : (10 forms) +// +// * VPMOVZXDQ xmm, xmm [AVX] +// * VPMOVZXDQ m64, xmm [AVX] +// * VPMOVZXDQ xmm, ymm [AVX2] +// * VPMOVZXDQ m128, ymm [AVX2] +// * VPMOVZXDQ ymm, zmm{k}{z} [AVX512F] +// * VPMOVZXDQ m256, zmm{k}{z} [AVX512F] +// * VPMOVZXDQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXDQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXDQ m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXDQ m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXDQ", 2, Operands { v0, v1 }) + // VPMOVZXDQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXDQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXDQ ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVZXDQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x35) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXDQ m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVZXDQ m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x35) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXDQ") + } + return p +} + +// VPMOVZXWD performs "Move Packed Word Integers to Doubleword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXWD +// Supported forms : (10 forms) +// +// * VPMOVZXWD xmm, xmm [AVX] +// * VPMOVZXWD m64, xmm [AVX] +// * VPMOVZXWD xmm, ymm [AVX2] +// * VPMOVZXWD m128, ymm [AVX2] +// * VPMOVZXWD ymm, zmm{k}{z} [AVX512F] +// * VPMOVZXWD m256, zmm{k}{z} [AVX512F] +// * VPMOVZXWD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWD xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWD m64, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWD m128, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXWD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXWD", 2, Operands { v0, v1 }) + // VPMOVZXWD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD m64, xmm + if isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXWD xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD m128, ymm + if isM128(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXWD ymm, zmm{k}{z} + if isEVEXYMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD m256, zmm{k}{z} + if isM256(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VPMOVZXWD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWD m64, xmm{k}{z} + if isM64(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VPMOVZXWD m128, ymm{k}{z} + if isM128(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXWD") + } + return p +} + +// VPMOVZXWQ performs "Move Packed Word Integers to Quadword Integers with Zero Extension". +// +// Mnemonic : VPMOVZXWQ +// Supported forms : (10 forms) +// +// * VPMOVZXWQ xmm, xmm [AVX] +// * VPMOVZXWQ m32, xmm [AVX] +// * VPMOVZXWQ xmm, ymm [AVX2] +// * VPMOVZXWQ m64, ymm [AVX2] +// * VPMOVZXWQ xmm, zmm{k}{z} [AVX512F] +// * VPMOVZXWQ m128, zmm{k}{z} [AVX512F] +// * VPMOVZXWQ xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWQ xmm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWQ m32, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMOVZXWQ m64, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMOVZXWQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPMOVZXWQ", 2, Operands { v0, v1 }) + // VPMOVZXWQ xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ m32, xmm + if isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXWQ xmm, ymm + if isXMM(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ m64, ymm + if isM64(v0) && isYMM(v1) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPMOVZXWQ xmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ m128, zmm{k}{z} + if isM128(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VPMOVZXWQ xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ xmm, ymm{k}{z} + if isEVEXXMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x34) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPMOVZXWQ m32, xmm{k}{z} + if isM32(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VPMOVZXWQ m64, ymm{k}{z} + if isM64(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), 0) + m.emit(0x34) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPMOVZXWQ") + } + return p +} + +// VPMULDQ performs "Multiply Packed Signed Doubleword Integers and Store Quadword Result". +// +// Mnemonic : VPMULDQ +// Supported forms : (10 forms) +// +// * VPMULDQ xmm, xmm, xmm [AVX] +// * VPMULDQ m128, xmm, xmm [AVX] +// * VPMULDQ ymm, ymm, ymm [AVX2] +// * VPMULDQ m256, ymm, ymm [AVX2] +// * VPMULDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMULDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMULDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMULDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMULDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULDQ", 3, Operands { v0, v1, v2 }) + // VPMULDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x28) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x28) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULDQ") + } + return p +} + +// VPMULHRSW performs "Packed Multiply Signed Word Integers and Store High Result with Round and Scale". +// +// Mnemonic : VPMULHRSW +// Supported forms : (10 forms) +// +// * VPMULHRSW xmm, xmm, xmm [AVX] +// * VPMULHRSW m128, xmm, xmm [AVX] +// * VPMULHRSW ymm, ymm, ymm [AVX2] +// * VPMULHRSW m256, ymm, ymm [AVX2] +// * VPMULHRSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHRSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHRSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHRSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHRSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHRSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMULHRSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULHRSW", 3, Operands { v0, v1, v2 }) + // VPMULHRSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHRSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHRSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULHRSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULHRSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHRSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x0b) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULHRSW") + } + return p +} + +// VPMULHUW performs "Multiply Packed Unsigned Word Integers and Store High Result". +// +// Mnemonic : VPMULHUW +// Supported forms : (10 forms) +// +// * VPMULHUW xmm, xmm, xmm [AVX] +// * VPMULHUW m128, xmm, xmm [AVX] +// * VPMULHUW ymm, ymm, ymm [AVX2] +// * VPMULHUW m256, ymm, ymm [AVX2] +// * VPMULHUW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHUW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHUW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHUW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHUW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHUW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMULHUW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULHUW", 3, Operands { v0, v1, v2 }) + // VPMULHUW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHUW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHUW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULHUW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULHUW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHUW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe4) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULHUW") + } + return p +} + +// VPMULHW performs "Multiply Packed Signed Word Integers and Store High Result". +// +// Mnemonic : VPMULHW +// Supported forms : (10 forms) +// +// * VPMULHW xmm, xmm, xmm [AVX] +// * VPMULHW m128, xmm, xmm [AVX] +// * VPMULHW ymm, ymm, ymm [AVX2] +// * VPMULHW m256, ymm, ymm [AVX2] +// * VPMULHW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMULHW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMULHW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMULHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULHW", 3, Operands { v0, v1, v2 }) + // VPMULHW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULHW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULHW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULHW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULHW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe5) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULHW") + } + return p +} + +// VPMULLD performs "Multiply Packed Signed Doubleword Integers and Store Low Result". +// +// Mnemonic : VPMULLD +// Supported forms : (10 forms) +// +// * VPMULLD xmm, xmm, xmm [AVX] +// * VPMULLD m128, xmm, xmm [AVX] +// * VPMULLD ymm, ymm, ymm [AVX2] +// * VPMULLD m256, ymm, ymm [AVX2] +// * VPMULLD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMULLD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMULLD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULLD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMULLD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMULLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULLD", 3, Operands { v0, v1, v2 }) + // VPMULLD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULLD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULLD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULLD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULLD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULLD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULLD") + } + return p +} + +// VPMULLQ performs "Multiply Packed Signed Quadword Integers and Store Low Result". +// +// Mnemonic : VPMULLQ +// Supported forms : (6 forms) +// +// * VPMULLQ m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VPMULLQ zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VPMULLQ m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VPMULLQ xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VPMULLQ m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VPMULLQ ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VPMULLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULLQ", 3, Operands { v0, v1, v2 }) + // VPMULLQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULLQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULLQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x40) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULLQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x40) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULLQ") + } + return p +} + +// VPMULLW performs "Multiply Packed Signed Word Integers and Store Low Result". +// +// Mnemonic : VPMULLW +// Supported forms : (10 forms) +// +// * VPMULLW xmm, xmm, xmm [AVX] +// * VPMULLW m128, xmm, xmm [AVX] +// * VPMULLW ymm, ymm, ymm [AVX2] +// * VPMULLW m256, ymm, ymm [AVX2] +// * VPMULLW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPMULLW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPMULLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPMULLW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPMULLW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPMULLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULLW", 3, Operands { v0, v1, v2 }) + // VPMULLW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULLW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULLW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULLW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULLW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd5) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULLW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd5) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULLW") + } + return p +} + +// VPMULTISHIFTQB performs "Select Packed Unaligned Bytes from Quadword Sources". +// +// Mnemonic : VPMULTISHIFTQB +// Supported forms : (6 forms) +// +// * VPMULTISHIFTQB m128/m64bcst, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPMULTISHIFTQB xmm, xmm, xmm{k}{z} [AVX512VBMI,AVX512VL] +// * VPMULTISHIFTQB m256/m64bcst, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPMULTISHIFTQB ymm, ymm, ymm{k}{z} [AVX512VBMI,AVX512VL] +// * VPMULTISHIFTQB m512/m64bcst, zmm, zmm{k}{z} [AVX512VBMI] +// * VPMULTISHIFTQB zmm, zmm, zmm{k}{z} [AVX512VBMI] +// +func (self *Program) VPMULTISHIFTQB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULTISHIFTQB", 3, Operands { v0, v1, v2 }) + // VPMULTISHIFTQB m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VBMI | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x83) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULTISHIFTQB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VBMI | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x83) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULTISHIFTQB m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VBMI | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x83) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULTISHIFTQB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VBMI | ISA_AVX512VL) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x83) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULTISHIFTQB m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x83) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULTISHIFTQB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512VBMI) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x83) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULTISHIFTQB") + } + return p +} + +// VPMULUDQ performs "Multiply Packed Unsigned Doubleword Integers". +// +// Mnemonic : VPMULUDQ +// Supported forms : (10 forms) +// +// * VPMULUDQ xmm, xmm, xmm [AVX] +// * VPMULUDQ m128, xmm, xmm [AVX] +// * VPMULUDQ ymm, ymm, ymm [AVX2] +// * VPMULUDQ m256, ymm, ymm [AVX2] +// * VPMULUDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPMULUDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPMULUDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULUDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPMULUDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPMULUDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPMULUDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPMULUDQ", 3, Operands { v0, v1, v2 }) + // VPMULUDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULUDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULUDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULUDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPMULUDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPMULUDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULUDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPMULUDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPMULUDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xf4) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPMULUDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf4) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPMULUDQ") + } + return p +} + +// VPOPCNTD performs "Packed Population Count for Doubleword Integers". +// +// Mnemonic : VPOPCNTD +// Supported forms : (2 forms) +// +// * VPOPCNTD m512/m32bcst, zmm{k}{z} [AVX512VPOPCNTDQ] +// * VPOPCNTD zmm, zmm{k}{z} [AVX512VPOPCNTDQ] +// +func (self *Program) VPOPCNTD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPOPCNTD", 2, Operands { v0, v1 }) + // VPOPCNTD m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512VPOPCNTDQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPOPCNTD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512VPOPCNTDQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x55) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPOPCNTD") + } + return p +} + +// VPOPCNTQ performs "Packed Population Count for Quadword Integers". +// +// Mnemonic : VPOPCNTQ +// Supported forms : (2 forms) +// +// * VPOPCNTQ m512/m64bcst, zmm{k}{z} [AVX512VPOPCNTDQ] +// * VPOPCNTQ zmm, zmm{k}{z} [AVX512VPOPCNTDQ] +// +func (self *Program) VPOPCNTQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPOPCNTQ", 2, Operands { v0, v1 }) + // VPOPCNTQ m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512VPOPCNTDQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x55) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VPOPCNTQ zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512VPOPCNTDQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x55) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPOPCNTQ") + } + return p +} + +// VPOR performs "Packed Bitwise Logical OR". +// +// Mnemonic : VPOR +// Supported forms : (4 forms) +// +// * VPOR xmm, xmm, xmm [AVX] +// * VPOR m128, xmm, xmm [AVX] +// * VPOR ymm, ymm, ymm [AVX2] +// * VPOR m256, ymm, ymm [AVX2] +// +func (self *Program) VPOR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPOR", 3, Operands { v0, v1, v2 }) + // VPOR xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPOR m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPOR ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPOR m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPOR") + } + return p +} + +// VPORD performs "Bitwise Logical OR of Packed Doubleword Integers". +// +// Mnemonic : VPORD +// Supported forms : (6 forms) +// +// * VPORD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPORD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPORD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPORD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPORD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPORD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPORD", 3, Operands { v0, v1, v2 }) + // VPORD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPORD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPORD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPORD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPORD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPORD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPORD") + } + return p +} + +// VPORQ performs "Bitwise Logical OR of Packed Quadword Integers". +// +// Mnemonic : VPORQ +// Supported forms : (6 forms) +// +// * VPORQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPORQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPORQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPORQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPORQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPORQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPORQ", 3, Operands { v0, v1, v2 }) + // VPORQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPORQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPORQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPORQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPORQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xeb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPORQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xeb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPORQ") + } + return p +} + +// VPPERM performs "Packed Permute Bytes". +// +// Mnemonic : VPPERM +// Supported forms : (3 forms) +// +// * VPPERM xmm, xmm, xmm, xmm [XOP] +// * VPPERM m128, xmm, xmm, xmm [XOP] +// * VPPERM xmm, m128, xmm, xmm [XOP] +// +func (self *Program) VPPERM(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPPERM", 4, Operands { v0, v1, v2, v3 }) + // VPPERM xmm, xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[2]) << 3)) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.emit(hlcode(v[0]) << 4) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[3]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[2]) << 3)) + m.emit(0xa3) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[0])) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPPERM m128, xmm, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x80, hcode(v[3]), addr(v[0]), hlcode(v[2])) + m.emit(0xa3) + m.mrsd(lcode(v[3]), addr(v[0]), 1) + m.emit(hlcode(v[1]) << 4) + }) + } + // VPPERM xmm, m128, xmm, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xa3) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.emit(hlcode(v[0]) << 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPPERM") + } + return p +} + +// VPROLD performs "Rotate Packed Doubleword Left". +// +// Mnemonic : VPROLD +// Supported forms : (6 forms) +// +// * VPROLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPROLD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPROLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPROLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPROLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROLD", 3, Operands { v0, v1, v2 }) + // VPROLD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPROLD") + } + return p +} + +// VPROLQ performs "Rotate Packed Quadword Left". +// +// Mnemonic : VPROLQ +// Supported forms : (6 forms) +// +// * VPROLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPROLQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPROLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPROLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPROLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROLQ", 3, Operands { v0, v1, v2 }) + // VPROLQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(1, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROLQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xc8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPROLQ") + } + return p +} + +// VPROLVD performs "Variable Rotate Packed Doubleword Left". +// +// Mnemonic : VPROLVD +// Supported forms : (6 forms) +// +// * VPROLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPROLVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPROLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPROLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPROLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROLVD", 3, Operands { v0, v1, v2 }) + // VPROLVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPROLVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROLVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPROLVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROLVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPROLVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPROLVD") + } + return p +} + +// VPROLVQ performs "Variable Rotate Packed Quadword Left". +// +// Mnemonic : VPROLVQ +// Supported forms : (6 forms) +// +// * VPROLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPROLVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPROLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPROLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPROLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPROLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROLVQ", 3, Operands { v0, v1, v2 }) + // VPROLVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPROLVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROLVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPROLVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROLVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPROLVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPROLVQ") + } + return p +} + +// VPRORD performs "Rotate Packed Doubleword Right". +// +// Mnemonic : VPRORD +// Supported forms : (6 forms) +// +// * VPRORD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPRORD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPRORD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPRORD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPRORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPRORD", 3, Operands { v0, v1, v2 }) + // VPRORD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPRORD") + } + return p +} + +// VPRORQ performs "Rotate Packed Quadword Right". +// +// Mnemonic : VPRORQ +// Supported forms : (6 forms) +// +// * VPRORQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPRORQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPRORQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPRORQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPRORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPRORQ", 3, Operands { v0, v1, v2 }) + // VPRORQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(0, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPRORQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xc0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPRORQ") + } + return p +} + +// VPRORVD performs "Variable Rotate Packed Doubleword Right". +// +// Mnemonic : VPRORVD +// Supported forms : (6 forms) +// +// * VPRORVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPRORVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPRORVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPRORVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPRORVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPRORVD", 3, Operands { v0, v1, v2 }) + // VPRORVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPRORVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPRORVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPRORVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPRORVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPRORVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPRORVD") + } + return p +} + +// VPRORVQ performs "Variable Rotate Packed Quadword Right". +// +// Mnemonic : VPRORVQ +// Supported forms : (6 forms) +// +// * VPRORVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPRORVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPRORVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPRORVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPRORVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPRORVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPRORVQ", 3, Operands { v0, v1, v2 }) + // VPRORVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPRORVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPRORVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPRORVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPRORVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPRORVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPRORVQ") + } + return p +} + +// VPROTB performs "Packed Rotate Bytes". +// +// Mnemonic : VPROTB +// Supported forms : (5 forms) +// +// * VPROTB imm8, xmm, xmm [XOP] +// * VPROTB xmm, xmm, xmm [XOP] +// * VPROTB m128, xmm, xmm [XOP] +// * VPROTB imm8, m128, xmm [XOP] +// * VPROTB xmm, m128, xmm [XOP] +// +func (self *Program) VPROTB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROTB", 3, Operands { v0, v1, v2 }) + // VPROTB imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0xc0) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x90) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x90) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROTB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPROTB imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0xc0) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTB xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x90) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPROTB") + } + return p +} + +// VPROTD performs "Packed Rotate Doublewords". +// +// Mnemonic : VPROTD +// Supported forms : (5 forms) +// +// * VPROTD imm8, xmm, xmm [XOP] +// * VPROTD xmm, xmm, xmm [XOP] +// * VPROTD m128, xmm, xmm [XOP] +// * VPROTD imm8, m128, xmm [XOP] +// * VPROTD xmm, m128, xmm [XOP] +// +func (self *Program) VPROTD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROTD", 3, Operands { v0, v1, v2 }) + // VPROTD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0xc2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x92) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x92) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROTD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPROTD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0xc2) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTD xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x92) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPROTD") + } + return p +} + +// VPROTQ performs "Packed Rotate Quadwords". +// +// Mnemonic : VPROTQ +// Supported forms : (5 forms) +// +// * VPROTQ imm8, xmm, xmm [XOP] +// * VPROTQ xmm, xmm, xmm [XOP] +// * VPROTQ m128, xmm, xmm [XOP] +// * VPROTQ imm8, m128, xmm [XOP] +// * VPROTQ xmm, m128, xmm [XOP] +// +func (self *Program) VPROTQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROTQ", 3, Operands { v0, v1, v2 }) + // VPROTQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0xc3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x93) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x93) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROTQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPROTQ imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0xc3) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTQ xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x93) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPROTQ") + } + return p +} + +// VPROTW performs "Packed Rotate Words". +// +// Mnemonic : VPROTW +// Supported forms : (5 forms) +// +// * VPROTW imm8, xmm, xmm [XOP] +// * VPROTW xmm, xmm, xmm [XOP] +// * VPROTW m128, xmm, xmm [XOP] +// * VPROTW imm8, m128, xmm [XOP] +// * VPROTW xmm, m128, xmm [XOP] +// +func (self *Program) VPROTW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPROTW", 3, Operands { v0, v1, v2 }) + // VPROTW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe8 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x91) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x91) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPROTW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPROTW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1000, 0x00, hcode(v[2]), addr(v[1]), 0) + m.emit(0xc1) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPROTW xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x91) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPROTW") + } + return p +} + +// VPSADBW performs "Compute Sum of Absolute Differences". +// +// Mnemonic : VPSADBW +// Supported forms : (10 forms) +// +// * VPSADBW xmm, xmm, xmm [AVX] +// * VPSADBW m128, xmm, xmm [AVX] +// * VPSADBW ymm, ymm, ymm [AVX2] +// * VPSADBW m256, ymm, ymm [AVX2] +// * VPSADBW zmm, zmm, zmm [AVX512BW] +// * VPSADBW m512, zmm, zmm [AVX512BW] +// * VPSADBW xmm, xmm, xmm [AVX512BW,AVX512VL] +// * VPSADBW m128, xmm, xmm [AVX512BW,AVX512VL] +// * VPSADBW ymm, ymm, ymm [AVX512BW,AVX512VL] +// * VPSADBW m256, ymm, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPSADBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSADBW", 3, Operands { v0, v1, v2 }) + // VPSADBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSADBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSADBW zmm, zmm, zmm + if isZMM(v0) && isZMM(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x40) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m512, zmm, zmm + if isM512(v0) && isZMM(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSADBW xmm, xmm, xmm + if isEVEXXMM(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x00) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m128, xmm, xmm + if isM128(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSADBW ymm, ymm, ymm + if isEVEXYMM(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | 0x20) + m.emit(0xf6) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSADBW m256, ymm, ymm + if isM256(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), 0, 0, 0) + m.emit(0xf6) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSADBW") + } + return p +} + +// VPSCATTERDD performs "Scatter Packed Doubleword Values with Signed Doubleword Indices". +// +// Mnemonic : VPSCATTERDD +// Supported forms : (3 forms) +// +// * VPSCATTERDD zmm, vm32z{k} [AVX512F] +// * VPSCATTERDD xmm, vm32x{k} [AVX512F,AVX512VL] +// * VPSCATTERDD ymm, vm32y{k} [AVX512F,AVX512VL] +// +func (self *Program) VPSCATTERDD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPSCATTERDD", 2, Operands { v0, v1 }) + // VPSCATTERDD zmm, vm32z{k} + if isZMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPSCATTERDD xmm, vm32x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPSCATTERDD ymm, vm32y{k} + if isEVEXYMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPSCATTERDD") + } + return p +} + +// VPSCATTERDQ performs "Scatter Packed Quadword Values with Signed Doubleword Indices". +// +// Mnemonic : VPSCATTERDQ +// Supported forms : (3 forms) +// +// * VPSCATTERDQ zmm, vm32y{k} [AVX512F] +// * VPSCATTERDQ xmm, vm32x{k} [AVX512F,AVX512VL] +// * VPSCATTERDQ ymm, vm32x{k} [AVX512F,AVX512VL] +// +func (self *Program) VPSCATTERDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPSCATTERDQ", 2, Operands { v0, v1 }) + // VPSCATTERDQ zmm, vm32y{k} + if isZMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPSCATTERDQ xmm, vm32x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPSCATTERDQ ymm, vm32x{k} + if isEVEXYMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa0) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPSCATTERDQ") + } + return p +} + +// VPSCATTERQD performs "Scatter Packed Doubleword Values with Signed Quadword Indices". +// +// Mnemonic : VPSCATTERQD +// Supported forms : (3 forms) +// +// * VPSCATTERQD ymm, vm64z{k} [AVX512F] +// * VPSCATTERQD xmm, vm64x{k} [AVX512F,AVX512VL] +// * VPSCATTERQD xmm, vm64y{k} [AVX512F,AVX512VL] +// +func (self *Program) VPSCATTERQD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPSCATTERQD", 2, Operands { v0, v1 }) + // VPSCATTERQD ymm, vm64z{k} + if isEVEXYMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPSCATTERQD xmm, vm64x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VPSCATTERQD xmm, vm64y{k} + if isEVEXXMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VPSCATTERQD") + } + return p +} + +// VPSCATTERQQ performs "Scatter Packed Quadword Values with Signed Quadword Indices". +// +// Mnemonic : VPSCATTERQQ +// Supported forms : (3 forms) +// +// * VPSCATTERQQ zmm, vm64z{k} [AVX512F] +// * VPSCATTERQQ xmm, vm64x{k} [AVX512F,AVX512VL] +// * VPSCATTERQQ ymm, vm64y{k} [AVX512F,AVX512VL] +// +func (self *Program) VPSCATTERQQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPSCATTERQQ", 2, Operands { v0, v1 }) + // VPSCATTERQQ zmm, vm64z{k} + if isZMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPSCATTERQQ xmm, vm64x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VPSCATTERQQ ymm, vm64y{k} + if isEVEXYMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa1) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VPSCATTERQQ") + } + return p +} + +// VPSHAB performs "Packed Shift Arithmetic Bytes". +// +// Mnemonic : VPSHAB +// Supported forms : (3 forms) +// +// * VPSHAB xmm, xmm, xmm [XOP] +// * VPSHAB m128, xmm, xmm [XOP] +// * VPSHAB xmm, m128, xmm [XOP] +// +func (self *Program) VPSHAB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHAB", 3, Operands { v0, v1, v2 }) + // VPSHAB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x98) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHAB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHAB xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x98) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHAB") + } + return p +} + +// VPSHAD performs "Packed Shift Arithmetic Doublewords". +// +// Mnemonic : VPSHAD +// Supported forms : (3 forms) +// +// * VPSHAD xmm, xmm, xmm [XOP] +// * VPSHAD m128, xmm, xmm [XOP] +// * VPSHAD xmm, m128, xmm [XOP] +// +func (self *Program) VPSHAD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHAD", 3, Operands { v0, v1, v2 }) + // VPSHAD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x9a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHAD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHAD xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x9a) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHAD") + } + return p +} + +// VPSHAQ performs "Packed Shift Arithmetic Quadwords". +// +// Mnemonic : VPSHAQ +// Supported forms : (3 forms) +// +// * VPSHAQ xmm, xmm, xmm [XOP] +// * VPSHAQ m128, xmm, xmm [XOP] +// * VPSHAQ xmm, m128, xmm [XOP] +// +func (self *Program) VPSHAQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHAQ", 3, Operands { v0, v1, v2 }) + // VPSHAQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x9b) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHAQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHAQ xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x9b) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHAQ") + } + return p +} + +// VPSHAW performs "Packed Shift Arithmetic Words". +// +// Mnemonic : VPSHAW +// Supported forms : (3 forms) +// +// * VPSHAW xmm, xmm, xmm [XOP] +// * VPSHAW m128, xmm, xmm [XOP] +// * VPSHAW xmm, m128, xmm [XOP] +// +func (self *Program) VPSHAW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHAW", 3, Operands { v0, v1, v2 }) + // VPSHAW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x99) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHAW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHAW xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x99) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHAW") + } + return p +} + +// VPSHLB performs "Packed Shift Logical Bytes". +// +// Mnemonic : VPSHLB +// Supported forms : (3 forms) +// +// * VPSHLB xmm, xmm, xmm [XOP] +// * VPSHLB m128, xmm, xmm [XOP] +// * VPSHLB xmm, m128, xmm [XOP] +// +func (self *Program) VPSHLB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHLB", 3, Operands { v0, v1, v2 }) + // VPSHLB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x94) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x94) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHLB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x94) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHLB xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x94) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHLB") + } + return p +} + +// VPSHLD performs "Packed Shift Logical Doublewords". +// +// Mnemonic : VPSHLD +// Supported forms : (3 forms) +// +// * VPSHLD xmm, xmm, xmm [XOP] +// * VPSHLD m128, xmm, xmm [XOP] +// * VPSHLD xmm, m128, xmm [XOP] +// +func (self *Program) VPSHLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHLD", 3, Operands { v0, v1, v2 }) + // VPSHLD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x96) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHLD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHLD xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x96) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHLD") + } + return p +} + +// VPSHLQ performs "Packed Shift Logical Quadwords". +// +// Mnemonic : VPSHLQ +// Supported forms : (3 forms) +// +// * VPSHLQ xmm, xmm, xmm [XOP] +// * VPSHLQ m128, xmm, xmm [XOP] +// * VPSHLQ xmm, m128, xmm [XOP] +// +func (self *Program) VPSHLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHLQ", 3, Operands { v0, v1, v2 }) + // VPSHLQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x97) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHLQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHLQ xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x97) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHLQ") + } + return p +} + +// VPSHLW performs "Packed Shift Logical Words". +// +// Mnemonic : VPSHLW +// Supported forms : (3 forms) +// +// * VPSHLW xmm, xmm, xmm [XOP] +// * VPSHLW m128, xmm, xmm [XOP] +// * VPSHLW xmm, m128, xmm [XOP] +// +func (self *Program) VPSHLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHLW", 3, Operands { v0, v1, v2 }) + // VPSHLW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x78 ^ (hlcode(v[0]) << 3)) + m.emit(0x95) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x8f) + m.emit(0xe9 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf8 ^ (hlcode(v[1]) << 3)) + m.emit(0x95) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHLW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x80, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x95) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHLW xmm, m128, xmm + if isXMM(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_XOP) + p.domain = DomainAMDSpecific + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0x8f, 0b1001, 0x00, hcode(v[2]), addr(v[1]), hlcode(v[0])) + m.emit(0x95) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHLW") + } + return p +} + +// VPSHUFB performs "Packed Shuffle Bytes". +// +// Mnemonic : VPSHUFB +// Supported forms : (10 forms) +// +// * VPSHUFB xmm, xmm, xmm [AVX] +// * VPSHUFB m128, xmm, xmm [AVX] +// * VPSHUFB ymm, ymm, ymm [AVX2] +// * VPSHUFB m256, ymm, ymm [AVX2] +// * VPSHUFB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSHUFB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSHUFB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSHUFB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHUFB", 3, Operands { v0, v1, v2 }) + // VPSHUFB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHUFB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSHUFB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSHUFB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSHUFB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x00) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSHUFB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x00) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHUFB") + } + return p +} + +// VPSHUFD performs "Shuffle Packed Doublewords". +// +// Mnemonic : VPSHUFD +// Supported forms : (10 forms) +// +// * VPSHUFD imm8, xmm, xmm [AVX] +// * VPSHUFD imm8, m128, xmm [AVX] +// * VPSHUFD imm8, ymm, ymm [AVX2] +// * VPSHUFD imm8, m256, ymm [AVX2] +// * VPSHUFD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPSHUFD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSHUFD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSHUFD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSHUFD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSHUFD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSHUFD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHUFD", 3, Operands { v0, v1, v2 }) + // VPSHUFD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHUFD") + } + return p +} + +// VPSHUFHW performs "Shuffle Packed High Words". +// +// Mnemonic : VPSHUFHW +// Supported forms : (10 forms) +// +// * VPSHUFHW imm8, xmm, xmm [AVX] +// * VPSHUFHW imm8, m128, xmm [AVX] +// * VPSHUFHW imm8, ymm, ymm [AVX2] +// * VPSHUFHW imm8, m256, ymm [AVX2] +// * VPSHUFHW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSHUFHW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSHUFHW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFHW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFHW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFHW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSHUFHW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHUFHW", 3, Operands { v0, v1, v2 }) + // VPSHUFHW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(6, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFHW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHUFHW") + } + return p +} + +// VPSHUFLW performs "Shuffle Packed Low Words". +// +// Mnemonic : VPSHUFLW +// Supported forms : (10 forms) +// +// * VPSHUFLW imm8, xmm, xmm [AVX] +// * VPSHUFLW imm8, m128, xmm [AVX] +// * VPSHUFLW imm8, ymm, ymm [AVX2] +// * VPSHUFLW imm8, m256, ymm [AVX2] +// * VPSHUFLW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSHUFLW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSHUFLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSHUFLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSHUFLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSHUFLW", 3, Operands { v0, v1, v2 }) + // VPSHUFLW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), v[1], 0) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(7, hcode(v[2]), addr(v[1]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7f) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x70) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSHUFLW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x07, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), 0) + m.emit(0x70) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSHUFLW") + } + return p +} + +// VPSIGNB performs "Packed Sign of Byte Integers". +// +// Mnemonic : VPSIGNB +// Supported forms : (4 forms) +// +// * VPSIGNB xmm, xmm, xmm [AVX] +// * VPSIGNB m128, xmm, xmm [AVX] +// * VPSIGNB ymm, ymm, ymm [AVX2] +// * VPSIGNB m256, ymm, ymm [AVX2] +// +func (self *Program) VPSIGNB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSIGNB", 3, Operands { v0, v1, v2 }) + // VPSIGNB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGNB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSIGNB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGNB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSIGNB") + } + return p +} + +// VPSIGND performs "Packed Sign of Doubleword Integers". +// +// Mnemonic : VPSIGND +// Supported forms : (4 forms) +// +// * VPSIGND xmm, xmm, xmm [AVX] +// * VPSIGND m128, xmm, xmm [AVX] +// * VPSIGND ymm, ymm, ymm [AVX2] +// * VPSIGND m256, ymm, ymm [AVX2] +// +func (self *Program) VPSIGND(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSIGND", 3, Operands { v0, v1, v2 }) + // VPSIGND xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGND m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSIGND ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGND m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x0a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSIGND") + } + return p +} + +// VPSIGNW performs "Packed Sign of Word Integers". +// +// Mnemonic : VPSIGNW +// Supported forms : (4 forms) +// +// * VPSIGNW xmm, xmm, xmm [AVX] +// * VPSIGNW m128, xmm, xmm [AVX] +// * VPSIGNW ymm, ymm, ymm [AVX2] +// * VPSIGNW m256, ymm, ymm [AVX2] +// +func (self *Program) VPSIGNW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSIGNW", 3, Operands { v0, v1, v2 }) + // VPSIGNW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGNW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSIGNW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSIGNW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPSIGNW") + } + return p +} + +// VPSLLD performs "Shift Packed Doubleword Data Left Logical". +// +// Mnemonic : VPSLLD +// Supported forms : (18 forms) +// +// * VPSLLD imm8, xmm, xmm [AVX] +// * VPSLLD xmm, xmm, xmm [AVX] +// * VPSLLD m128, xmm, xmm [AVX] +// * VPSLLD imm8, ymm, ymm [AVX2] +// * VPSLLD xmm, ymm, ymm [AVX2] +// * VPSLLD m128, ymm, ymm [AVX2] +// * VPSLLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPSLLD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSLLD xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSLLD m128, zmm, zmm{k}{z} [AVX512F] +// * VPSLLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSLLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLD", 3, Operands { v0, v1, v2 }) + // VPSLLD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(6, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(6, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(6, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLD xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLD m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLD") + } + return p +} + +// VPSLLDQ performs "Shift Packed Double Quadword Left Logical". +// +// Mnemonic : VPSLLDQ +// Supported forms : (8 forms) +// +// * VPSLLDQ imm8, xmm, xmm [AVX] +// * VPSLLDQ imm8, ymm, ymm [AVX2] +// * VPSLLDQ imm8, zmm, zmm [AVX512BW] +// * VPSLLDQ imm8, m512, zmm [AVX512BW] +// * VPSLLDQ imm8, xmm, xmm [AVX512BW,AVX512VL] +// * VPSLLDQ imm8, m128, xmm [AVX512BW,AVX512VL] +// * VPSLLDQ imm8, ymm, ymm [AVX512BW,AVX512VL] +// * VPSLLDQ imm8, m256, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPSLLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLDQ", 3, Operands { v0, v1, v2 }) + // VPSLLDQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, zmm, zmm + if isImm8(v0) && isZMM(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x40) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, m512, zmm + if isImm8(v0) && isM512(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(7, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, xmm, xmm + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(7, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, ymm, ymm + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x20) + m.emit(0x73) + m.emit(0xf8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLDQ imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(7, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLDQ") + } + return p +} + +// VPSLLQ performs "Shift Packed Quadword Data Left Logical". +// +// Mnemonic : VPSLLQ +// Supported forms : (18 forms) +// +// * VPSLLQ imm8, xmm, xmm [AVX] +// * VPSLLQ xmm, xmm, xmm [AVX] +// * VPSLLQ m128, xmm, xmm [AVX] +// * VPSLLQ imm8, ymm, ymm [AVX2] +// * VPSLLQ xmm, ymm, ymm [AVX2] +// * VPSLLQ m128, ymm, ymm [AVX2] +// * VPSLLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPSLLQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSLLQ xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSLLQ m128, zmm, zmm{k}{z} [AVX512F] +// * VPSLLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSLLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLQ", 3, Operands { v0, v1, v2 }) + // VPSLLQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(6, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(6, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(6, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x73) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLQ xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLQ m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLQ") + } + return p +} + +// VPSLLVD performs "Variable Shift Packed Doubleword Data Left Logical". +// +// Mnemonic : VPSLLVD +// Supported forms : (10 forms) +// +// * VPSLLVD xmm, xmm, xmm [AVX2] +// * VPSLLVD m128, xmm, xmm [AVX2] +// * VPSLLVD ymm, ymm, ymm [AVX2] +// * VPSLLVD m256, ymm, ymm [AVX2] +// * VPSLLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSLLVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSLLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSLLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLVD", 3, Operands { v0, v1, v2 }) + // VPSLLVD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLVD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSLLVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSLLVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLVD") + } + return p +} + +// VPSLLVQ performs "Variable Shift Packed Quadword Data Left Logical". +// +// Mnemonic : VPSLLVQ +// Supported forms : (10 forms) +// +// * VPSLLVQ xmm, xmm, xmm [AVX2] +// * VPSLLVQ m128, xmm, xmm [AVX2] +// * VPSLLVQ ymm, ymm, ymm [AVX2] +// * VPSLLVQ m256, ymm, ymm [AVX2] +// * VPSLLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSLLVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSLLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSLLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSLLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLVQ", 3, Operands { v0, v1, v2 }) + // VPSLLVQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLVQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSLLVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x47) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSLLVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x47) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLVQ") + } + return p +} + +// VPSLLVW performs "Variable Shift Packed Word Data Left Logical". +// +// Mnemonic : VPSLLVW +// Supported forms : (6 forms) +// +// * VPSLLVW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLVW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSLLVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLVW", 3, Operands { v0, v1, v2 }) + // VPSLLVW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSLLVW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLVW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x12) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLVW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x12) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLVW") + } + return p +} + +// VPSLLW performs "Shift Packed Word Data Left Logical". +// +// Mnemonic : VPSLLW +// Supported forms : (18 forms) +// +// * VPSLLW imm8, xmm, xmm [AVX] +// * VPSLLW xmm, xmm, xmm [AVX] +// * VPSLLW m128, xmm, xmm [AVX] +// * VPSLLW imm8, ymm, ymm [AVX2] +// * VPSLLW xmm, ymm, ymm [AVX2] +// * VPSLLW m128, ymm, ymm [AVX2] +// * VPSLLW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLW xmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLW m128, zmm, zmm{k}{z} [AVX512BW] +// * VPSLLW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSLLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSLLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSLLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSLLW", 3, Operands { v0, v1, v2 }) + // VPSLLW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSLLW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(6, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x71) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSLLW m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSLLW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(6, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSLLW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(6, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSLLW") + } + return p +} + +// VPSRAD performs "Shift Packed Doubleword Data Right Arithmetic". +// +// Mnemonic : VPSRAD +// Supported forms : (18 forms) +// +// * VPSRAD imm8, xmm, xmm [AVX] +// * VPSRAD xmm, xmm, xmm [AVX] +// * VPSRAD m128, xmm, xmm [AVX] +// * VPSRAD imm8, ymm, ymm [AVX2] +// * VPSRAD xmm, ymm, ymm [AVX2] +// * VPSRAD m128, ymm, ymm [AVX2] +// * VPSRAD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPSRAD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSRAD xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRAD m128, zmm, zmm{k}{z} [AVX512F] +// * VPSRAD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRAD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAD", 3, Operands { v0, v1, v2 }) + // VPSRAD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAD xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAD m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAD") + } + return p +} + +// VPSRAQ performs "Shift Packed Quadword Data Right Arithmetic". +// +// Mnemonic : VPSRAQ +// Supported forms : (12 forms) +// +// * VPSRAQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPSRAQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSRAQ xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRAQ m128, zmm, zmm{k}{z} [AVX512F] +// * VPSRAQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRAQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAQ", 3, Operands { v0, v1, v2 }) + // VPSRAQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAQ m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(4, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAQ m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAQ xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAQ m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAQ") + } + return p +} + +// VPSRAVD performs "Variable Shift Packed Doubleword Data Right Arithmetic". +// +// Mnemonic : VPSRAVD +// Supported forms : (10 forms) +// +// * VPSRAVD xmm, xmm, xmm [AVX2] +// * VPSRAVD m128, xmm, xmm [AVX2] +// * VPSRAVD ymm, ymm, ymm [AVX2] +// * VPSRAVD m256, ymm, ymm [AVX2] +// * VPSRAVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSRAVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRAVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRAVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAVD", 3, Operands { v0, v1, v2 }) + // VPSRAVD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAVD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRAVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSRAVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAVD") + } + return p +} + +// VPSRAVQ performs "Variable Shift Packed Quadword Data Right Arithmetic". +// +// Mnemonic : VPSRAVQ +// Supported forms : (6 forms) +// +// * VPSRAVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSRAVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRAVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRAVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRAVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAVQ", 3, Operands { v0, v1, v2 }) + // VPSRAVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRAVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x46) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSRAVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x46) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAVQ") + } + return p +} + +// VPSRAVW performs "Variable Shift Packed Word Data Right Arithmetic". +// +// Mnemonic : VPSRAVW +// Supported forms : (6 forms) +// +// * VPSRAVW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAVW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSRAVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAVW", 3, Operands { v0, v1, v2 }) + // VPSRAVW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x11) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x11) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRAVW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x11) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x11) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAVW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x11) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAVW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x11) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAVW") + } + return p +} + +// VPSRAW performs "Shift Packed Word Data Right Arithmetic". +// +// Mnemonic : VPSRAW +// Supported forms : (18 forms) +// +// * VPSRAW imm8, xmm, xmm [AVX] +// * VPSRAW xmm, xmm, xmm [AVX] +// * VPSRAW m128, xmm, xmm [AVX] +// * VPSRAW imm8, ymm, ymm [AVX2] +// * VPSRAW xmm, ymm, ymm [AVX2] +// * VPSRAW m128, ymm, ymm [AVX2] +// * VPSRAW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAW xmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAW m128, zmm, zmm{k}{z} [AVX512BW] +// * VPSRAW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSRAW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRAW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSRAW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRAW", 3, Operands { v0, v1, v2 }) + // VPSRAW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRAW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(4, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x71) + m.emit(0xe0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRAW m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRAW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(4, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRAW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(4, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRAW") + } + return p +} + +// VPSRLD performs "Shift Packed Doubleword Data Right Logical". +// +// Mnemonic : VPSRLD +// Supported forms : (18 forms) +// +// * VPSRLD imm8, xmm, xmm [AVX] +// * VPSRLD xmm, xmm, xmm [AVX] +// * VPSRLD m128, xmm, xmm [AVX] +// * VPSRLD imm8, ymm, ymm [AVX2] +// * VPSRLD xmm, ymm, ymm [AVX2] +// * VPSRLD m128, ymm, ymm [AVX2] +// * VPSRLD imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VPSRLD imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSRLD xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRLD m128, zmm, zmm{k}{z} [AVX512F] +// * VPSRLD imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLD m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRLD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLD", 3, Operands { v0, v1, v2 }) + // VPSRLD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLD imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(2, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLD imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(2, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x72) + m.mrsd(2, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x72) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLD xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd2) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLD m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd2) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLD") + } + return p +} + +// VPSRLDQ performs "Shift Packed Double Quadword Right Logical". +// +// Mnemonic : VPSRLDQ +// Supported forms : (8 forms) +// +// * VPSRLDQ imm8, xmm, xmm [AVX] +// * VPSRLDQ imm8, ymm, ymm [AVX2] +// * VPSRLDQ imm8, zmm, zmm [AVX512BW] +// * VPSRLDQ imm8, m512, zmm [AVX512BW] +// * VPSRLDQ imm8, xmm, xmm [AVX512BW,AVX512VL] +// * VPSRLDQ imm8, m128, xmm [AVX512BW,AVX512VL] +// * VPSRLDQ imm8, ymm, ymm [AVX512BW,AVX512VL] +// * VPSRLDQ imm8, m256, ymm [AVX512BW,AVX512VL] +// +func (self *Program) VPSRLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLDQ", 3, Operands { v0, v1, v2 }) + // VPSRLDQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, zmm, zmm + if isImm8(v0) && isZMM(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x40) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, m512, zmm + if isImm8(v0) && isM512(v1) && isZMM(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(3, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, xmm, xmm + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x00) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isEVEXXMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(3, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, ymm, ymm + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((0x08 ^ (ecode(v[2]) << 3)) | 0x20) + m.emit(0x73) + m.emit(0xd8 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLDQ imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isEVEXYMM(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), 0, 0, 0) + m.emit(0x73) + m.mrsd(3, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLDQ") + } + return p +} + +// VPSRLQ performs "Shift Packed Quadword Data Right Logical". +// +// Mnemonic : VPSRLQ +// Supported forms : (18 forms) +// +// * VPSRLQ imm8, xmm, xmm [AVX] +// * VPSRLQ xmm, xmm, xmm [AVX] +// * VPSRLQ m128, xmm, xmm [AVX] +// * VPSRLQ imm8, ymm, ymm [AVX2] +// * VPSRLQ xmm, ymm, ymm [AVX2] +// * VPSRLQ m128, ymm, ymm [AVX2] +// * VPSRLQ imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VPSRLQ imm8, zmm, zmm{k}{z} [AVX512F] +// * VPSRLQ xmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRLQ m128, zmm, zmm{k}{z} [AVX512F] +// * VPSRLQ imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ m128, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ xmm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLQ m128, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRLQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLQ", 3, Operands { v0, v1, v2 }) + // VPSRLQ imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLQ imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLQ imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(2, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLQ imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(2, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x73) + m.mrsd(2, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLQ imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x73) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLQ xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd3) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLQ m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd3) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLQ") + } + return p +} + +// VPSRLVD performs "Variable Shift Packed Doubleword Data Right Logical". +// +// Mnemonic : VPSRLVD +// Supported forms : (10 forms) +// +// * VPSRLVD xmm, xmm, xmm [AVX2] +// * VPSRLVD m128, xmm, xmm [AVX2] +// * VPSRLVD ymm, ymm, ymm [AVX2] +// * VPSRLVD m256, ymm, ymm [AVX2] +// * VPSRLVD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSRLVD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRLVD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRLVD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLVD", 3, Operands { v0, v1, v2 }) + // VPSRLVD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79 ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLVD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLVD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRLVD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLVD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSRLVD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLVD") + } + return p +} + +// VPSRLVQ performs "Variable Shift Packed Quadword Data Right Logical". +// +// Mnemonic : VPSRLVQ +// Supported forms : (10 forms) +// +// * VPSRLVQ xmm, xmm, xmm [AVX2] +// * VPSRLVQ m128, xmm, xmm [AVX2] +// * VPSRLVQ ymm, ymm, ymm [AVX2] +// * VPSRLVQ m256, ymm, ymm [AVX2] +// * VPSRLVQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSRLVQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSRLVQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSRLVQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSRLVQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLVQ", 3, Operands { v0, v1, v2 }) + // VPSRLVQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xf9 ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x81, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLVQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[2]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x85, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLVQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRLVQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLVQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x45) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSRLVQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x45) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLVQ") + } + return p +} + +// VPSRLVW performs "Variable Shift Packed Word Data Right Logical". +// +// Mnemonic : VPSRLVW +// Supported forms : (6 forms) +// +// * VPSRLVW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLVW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLVW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLVW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLVW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLVW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSRLVW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLVW", 3, Operands { v0, v1, v2 }) + // VPSRLVW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSRLVW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLVW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x10) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLVW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x10) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLVW") + } + return p +} + +// VPSRLW performs "Shift Packed Word Data Right Logical". +// +// Mnemonic : VPSRLW +// Supported forms : (18 forms) +// +// * VPSRLW imm8, xmm, xmm [AVX] +// * VPSRLW xmm, xmm, xmm [AVX] +// * VPSRLW m128, xmm, xmm [AVX] +// * VPSRLW imm8, ymm, ymm [AVX2] +// * VPSRLW xmm, ymm, ymm [AVX2] +// * VPSRLW m128, ymm, ymm [AVX2] +// * VPSRLW imm8, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLW xmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLW m128, zmm, zmm{k}{z} [AVX512BW] +// * VPSRLW imm8, m512, zmm{k}{z} [AVX512BW] +// * VPSRLW imm8, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW imm8, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW xmm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW m128, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW imm8, m128, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSRLW imm8, m256, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSRLW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSRLW", 3, Operands { v0, v1, v2 }) + // VPSRLW imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLW imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, 0, v[1], hlcode(v[2])) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, ymm, ymm + if isXMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, ymm, ymm + if isM128(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSRLW imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, zmm, zmm{k}{z} + if isEVEXXMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, zmm, zmm{k}{z} + if isM128(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLW imm8, m512, zmm{k}{z} + if isImm8(v0) && isM512(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(2, addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLW imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ (ehcode(v[1]) << 5)) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x71) + m.emit(0xd0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW xmm, ymm, ymm{k}{z} + if isEVEXXMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd1) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSRLW m128, ymm, ymm{k}{z} + if isM128(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd1) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSRLW imm8, m128, xmm{k}{z} + if isImm8(v0) && isM128(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(2, addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPSRLW imm8, m256, ymm{k}{z} + if isImm8(v0) && isM256(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, 0, addr(v[1]), vcode(v[2]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x71) + m.mrsd(2, addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSRLW") + } + return p +} + +// VPSUBB performs "Subtract Packed Byte Integers". +// +// Mnemonic : VPSUBB +// Supported forms : (10 forms) +// +// * VPSUBB xmm, xmm, xmm [AVX] +// * VPSUBB m128, xmm, xmm [AVX] +// * VPSUBB ymm, ymm, ymm [AVX2] +// * VPSUBB m256, ymm, ymm [AVX2] +// * VPSUBB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBB", 3, Operands { v0, v1, v2 }) + // VPSUBB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBB") + } + return p +} + +// VPSUBD performs "Subtract Packed Doubleword Integers". +// +// Mnemonic : VPSUBD +// Supported forms : (10 forms) +// +// * VPSUBD xmm, xmm, xmm [AVX] +// * VPSUBD m128, xmm, xmm [AVX] +// * VPSUBD ymm, ymm, ymm [AVX2] +// * VPSUBD m256, ymm, ymm [AVX2] +// * VPSUBD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSUBD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSUBD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSUBD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSUBD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSUBD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSUBD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBD", 3, Operands { v0, v1, v2 }) + // VPSUBD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfa) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSUBD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfa) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBD") + } + return p +} + +// VPSUBQ performs "Subtract Packed Quadword Integers". +// +// Mnemonic : VPSUBQ +// Supported forms : (10 forms) +// +// * VPSUBQ xmm, xmm, xmm [AVX] +// * VPSUBQ m128, xmm, xmm [AVX] +// * VPSUBQ ymm, ymm, ymm [AVX2] +// * VPSUBQ m256, ymm, ymm [AVX2] +// * VPSUBQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPSUBQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPSUBQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSUBQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPSUBQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPSUBQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPSUBQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBQ", 3, Operands { v0, v1, v2 }) + // VPSUBQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xfb) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPSUBQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xfb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBQ") + } + return p +} + +// VPSUBSB performs "Subtract Packed Signed Byte Integers with Signed Saturation". +// +// Mnemonic : VPSUBSB +// Supported forms : (10 forms) +// +// * VPSUBSB xmm, xmm, xmm [AVX] +// * VPSUBSB m128, xmm, xmm [AVX] +// * VPSUBSB ymm, ymm, ymm [AVX2] +// * VPSUBSB m256, ymm, ymm [AVX2] +// * VPSUBSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBSB", 3, Operands { v0, v1, v2 }) + // VPSUBSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBSB") + } + return p +} + +// VPSUBSW performs "Subtract Packed Signed Word Integers with Signed Saturation". +// +// Mnemonic : VPSUBSW +// Supported forms : (10 forms) +// +// * VPSUBSW xmm, xmm, xmm [AVX] +// * VPSUBSW m128, xmm, xmm [AVX] +// * VPSUBSW ymm, ymm, ymm [AVX2] +// * VPSUBSW m256, ymm, ymm [AVX2] +// * VPSUBSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBSW", 3, Operands { v0, v1, v2 }) + // VPSUBSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xe9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xe9) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBSW") + } + return p +} + +// VPSUBUSB performs "Subtract Packed Unsigned Byte Integers with Unsigned Saturation". +// +// Mnemonic : VPSUBUSB +// Supported forms : (10 forms) +// +// * VPSUBUSB xmm, xmm, xmm [AVX] +// * VPSUBUSB m128, xmm, xmm [AVX] +// * VPSUBUSB ymm, ymm, ymm [AVX2] +// * VPSUBUSB m256, ymm, ymm [AVX2] +// * VPSUBUSB zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBUSB m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBUSB xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSB m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSB ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSB m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBUSB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBUSB", 3, Operands { v0, v1, v2 }) + // VPSUBUSB xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBUSB ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBUSB zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBUSB xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBUSB ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd8) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSB m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd8) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBUSB") + } + return p +} + +// VPSUBUSW performs "Subtract Packed Unsigned Word Integers with Unsigned Saturation". +// +// Mnemonic : VPSUBUSW +// Supported forms : (10 forms) +// +// * VPSUBUSW xmm, xmm, xmm [AVX] +// * VPSUBUSW m128, xmm, xmm [AVX] +// * VPSUBUSW ymm, ymm, ymm [AVX2] +// * VPSUBUSW m256, ymm, ymm [AVX2] +// * VPSUBUSW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBUSW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBUSW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBUSW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBUSW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBUSW", 3, Operands { v0, v1, v2 }) + // VPSUBUSW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBUSW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBUSW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBUSW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBUSW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xd9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBUSW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xd9) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBUSW") + } + return p +} + +// VPSUBW performs "Subtract Packed Word Integers". +// +// Mnemonic : VPSUBW +// Supported forms : (10 forms) +// +// * VPSUBW xmm, xmm, xmm [AVX] +// * VPSUBW m128, xmm, xmm [AVX] +// * VPSUBW ymm, ymm, ymm [AVX2] +// * VPSUBW m256, ymm, ymm [AVX2] +// * VPSUBW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPSUBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPSUBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPSUBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPSUBW", 3, Operands { v0, v1, v2 }) + // VPSUBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPSUBW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPSUBW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPSUBW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xf9) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPSUBW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xf9) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPSUBW") + } + return p +} + +// VPTERNLOGD performs "Bitwise Ternary Logical Operation on Doubleword Values". +// +// Mnemonic : VPTERNLOGD +// Supported forms : (6 forms) +// +// * VPTERNLOGD imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPTERNLOGD imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VPTERNLOGD imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGD imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPTERNLOGD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPTERNLOGD", 4, Operands { v0, v1, v2, v3 }) + // VPTERNLOGD imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, m128/m32bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGD imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTERNLOGD") + } + return p +} + +// VPTERNLOGQ performs "Bitwise Ternary Logical Operation on Quadword Values". +// +// Mnemonic : VPTERNLOGQ +// Supported forms : (6 forms) +// +// * VPTERNLOGQ imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPTERNLOGQ imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VPTERNLOGQ imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGQ imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGQ imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPTERNLOGQ imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPTERNLOGQ(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VPTERNLOGQ", 4, Operands { v0, v1, v2, v3 }) + // VPTERNLOGQ imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, m128/m64bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x25) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VPTERNLOGQ imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x25) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTERNLOGQ") + } + return p +} + +// VPTEST performs "Packed Logical Compare". +// +// Mnemonic : VPTEST +// Supported forms : (4 forms) +// +// * VPTEST xmm, xmm [AVX] +// * VPTEST m128, xmm [AVX] +// * VPTEST ymm, ymm [AVX] +// * VPTEST m256, ymm [AVX] +// +func (self *Program) VPTEST(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VPTEST", 2, Operands { v0, v1 }) + // VPTEST xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPTEST m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VPTEST ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x17) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VPTEST m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x17) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPTEST") + } + return p +} + +// VPTESTMB performs "Logical AND of Packed Byte Integer Values and Set Mask". +// +// Mnemonic : VPTESTMB +// Supported forms : (6 forms) +// +// * VPTESTMB zmm, zmm, k{k} [AVX512BW] +// * VPTESTMB m512, zmm, k{k} [AVX512BW] +// * VPTESTMB xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMB m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMB ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMB m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPTESTMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTMB", 3, Operands { v0, v1, v2 }) + // VPTESTMB zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMB m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTMB xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMB m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTMB ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMB m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTMB") + } + return p +} + +// VPTESTMD performs "Logical AND of Packed Doubleword Integer Values and Set Mask". +// +// Mnemonic : VPTESTMD +// Supported forms : (6 forms) +// +// * VPTESTMD m512/m32bcst, zmm, k{k} [AVX512F] +// * VPTESTMD zmm, zmm, k{k} [AVX512F] +// * VPTESTMD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTMD xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTMD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPTESTMD ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPTESTMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTMD", 3, Operands { v0, v1, v2 }) + // VPTESTMD m512/m32bcst, zmm, k{k} + if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTMD zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMD m128/m32bcst, xmm, k{k} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTMD xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMD m256/m32bcst, ymm, k{k} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPTESTMD ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTMD") + } + return p +} + +// VPTESTMQ performs "Logical AND of Packed Quadword Integer Values and Set Mask". +// +// Mnemonic : VPTESTMQ +// Supported forms : (6 forms) +// +// * VPTESTMQ m512/m64bcst, zmm, k{k} [AVX512F] +// * VPTESTMQ zmm, zmm, k{k} [AVX512F] +// * VPTESTMQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTMQ xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTMQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPTESTMQ ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPTESTMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTMQ", 3, Operands { v0, v1, v2 }) + // VPTESTMQ m512/m64bcst, zmm, k{k} + if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTMQ zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMQ m128/m64bcst, xmm, k{k} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTMQ xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMQ m256/m64bcst, ymm, k{k} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPTESTMQ ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTMQ") + } + return p +} + +// VPTESTMW performs "Logical AND of Packed Word Integer Values and Set Mask". +// +// Mnemonic : VPTESTMW +// Supported forms : (6 forms) +// +// * VPTESTMW zmm, zmm, k{k} [AVX512BW] +// * VPTESTMW m512, zmm, k{k} [AVX512BW] +// * VPTESTMW xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMW m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMW ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPTESTMW m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPTESTMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTMW", 3, Operands { v0, v1, v2 }) + // VPTESTMW zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMW m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTMW xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMW m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTMW ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTMW m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTMW") + } + return p +} + +// VPTESTNMB performs "Logical NAND of Packed Byte Integer Values and Set Mask". +// +// Mnemonic : VPTESTNMB +// Supported forms : (6 forms) +// +// * VPTESTNMB zmm, zmm, k{k} [AVX512BW,AVX512F] +// * VPTESTNMB m512, zmm, k{k} [AVX512BW,AVX512F] +// * VPTESTNMB xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMB m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMB ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMB m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPTESTNMB(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTNMB", 3, Operands { v0, v1, v2 }) + // VPTESTNMB zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMB m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTNMB xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMB m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTNMB ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMB m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTNMB") + } + return p +} + +// VPTESTNMD performs "Logical NAND of Packed Doubleword Integer Values and Set Mask". +// +// Mnemonic : VPTESTNMD +// Supported forms : (6 forms) +// +// * VPTESTNMD m512/m32bcst, zmm, k{k} [AVX512F] +// * VPTESTNMD zmm, zmm, k{k} [AVX512F] +// * VPTESTNMD m128/m32bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMD xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMD m256/m32bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMD ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPTESTNMD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTNMD", 3, Operands { v0, v1, v2 }) + // VPTESTNMD m512/m32bcst, zmm, k{k} + if isM512M32bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTNMD zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMD m128/m32bcst, xmm, k{k} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTNMD xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMD m256/m32bcst, ymm, k{k} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x06, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPTESTNMD ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTNMD") + } + return p +} + +// VPTESTNMQ performs "Logical NAND of Packed Quadword Integer Values and Set Mask". +// +// Mnemonic : VPTESTNMQ +// Supported forms : (6 forms) +// +// * VPTESTNMQ m512/m64bcst, zmm, k{k} [AVX512F] +// * VPTESTNMQ zmm, zmm, k{k} [AVX512F] +// * VPTESTNMQ m128/m64bcst, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMQ xmm, xmm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMQ m256/m64bcst, ymm, k{k} [AVX512F,AVX512VL] +// * VPTESTNMQ ymm, ymm, k{k} [AVX512F,AVX512VL] +// +func (self *Program) VPTESTNMQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTNMQ", 3, Operands { v0, v1, v2 }) + // VPTESTNMQ m512/m64bcst, zmm, k{k} + if isM512M64bcst(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTNMQ zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMQ m128/m64bcst, xmm, k{k} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTNMQ xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMQ m256/m64bcst, ymm, k{k} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, bcode(v[0])) + m.emit(0x27) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPTESTNMQ ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x27) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTNMQ") + } + return p +} + +// VPTESTNMW performs "Logical NAND of Packed Word Integer Values and Set Mask". +// +// Mnemonic : VPTESTNMW +// Supported forms : (6 forms) +// +// * VPTESTNMW zmm, zmm, k{k} [AVX512BW,AVX512F] +// * VPTESTNMW m512, zmm, k{k} [AVX512BW,AVX512F] +// * VPTESTNMW xmm, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMW m128, xmm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMW ymm, ymm, k{k} [AVX512BW,AVX512VL] +// * VPTESTNMW m256, ymm, k{k} [AVX512BW,AVX512VL] +// +func (self *Program) VPTESTNMW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPTESTNMW", 3, Operands { v0, v1, v2 }) + // VPTESTNMW zmm, zmm, k{k} + if isZMM(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMW m512, zmm, k{k} + if isM512(v0) && isZMM(v1) && isKk(v2) { + self.require(ISA_AVX512F | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPTESTNMW xmm, xmm, k{k} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMW m128, xmm, k{k} + if isM128(v0) && isEVEXXMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPTESTNMW ymm, ymm, k{k} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfe ^ (hlcode(v[1]) << 3)) + m.emit((0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x26) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPTESTNMW m256, ymm, k{k} + if isM256(v0) && isEVEXYMM(v1) && isKk(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x86, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), 0, 0) + m.emit(0x26) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPTESTNMW") + } + return p +} + +// VPUNPCKHBW performs "Unpack and Interleave High-Order Bytes into Words". +// +// Mnemonic : VPUNPCKHBW +// Supported forms : (10 forms) +// +// * VPUNPCKHBW xmm, xmm, xmm [AVX] +// * VPUNPCKHBW m128, xmm, xmm [AVX] +// * VPUNPCKHBW ymm, ymm, ymm [AVX2] +// * VPUNPCKHBW m256, ymm, ymm [AVX2] +// * VPUNPCKHBW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKHBW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKHBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPUNPCKHBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKHBW", 3, Operands { v0, v1, v2 }) + // VPUNPCKHBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHBW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKHBW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKHBW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x68) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHBW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x68) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKHBW") + } + return p +} + +// VPUNPCKHDQ performs "Unpack and Interleave High-Order Doublewords into Quadwords". +// +// Mnemonic : VPUNPCKHDQ +// Supported forms : (10 forms) +// +// * VPUNPCKHDQ xmm, xmm, xmm [AVX] +// * VPUNPCKHDQ m128, xmm, xmm [AVX] +// * VPUNPCKHDQ ymm, ymm, ymm [AVX2] +// * VPUNPCKHDQ m256, ymm, ymm [AVX2] +// * VPUNPCKHDQ m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKHDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKHDQ m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHDQ m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPUNPCKHDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKHDQ", 3, Operands { v0, v1, v2 }) + // VPUNPCKHDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHDQ m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKHDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHDQ m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKHDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHDQ m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6a) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPUNPCKHDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x6a) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKHDQ") + } + return p +} + +// VPUNPCKHQDQ performs "Unpack and Interleave High-Order Quadwords into Double Quadwords". +// +// Mnemonic : VPUNPCKHQDQ +// Supported forms : (10 forms) +// +// * VPUNPCKHQDQ xmm, xmm, xmm [AVX] +// * VPUNPCKHQDQ m128, xmm, xmm [AVX] +// * VPUNPCKHQDQ ymm, ymm, ymm [AVX2] +// * VPUNPCKHQDQ m256, ymm, ymm [AVX2] +// * VPUNPCKHQDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKHQDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKHQDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHQDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHQDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKHQDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPUNPCKHQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKHQDQ", 3, Operands { v0, v1, v2 }) + // VPUNPCKHQDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHQDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHQDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHQDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHQDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKHQDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHQDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKHQDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHQDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6d) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPUNPCKHQDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x6d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKHQDQ") + } + return p +} + +// VPUNPCKHWD performs "Unpack and Interleave High-Order Words into Doublewords". +// +// Mnemonic : VPUNPCKHWD +// Supported forms : (10 forms) +// +// * VPUNPCKHWD xmm, xmm, xmm [AVX] +// * VPUNPCKHWD m128, xmm, xmm [AVX] +// * VPUNPCKHWD ymm, ymm, ymm [AVX2] +// * VPUNPCKHWD m256, ymm, ymm [AVX2] +// * VPUNPCKHWD zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKHWD m512, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKHWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKHWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPUNPCKHWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKHWD", 3, Operands { v0, v1, v2 }) + // VPUNPCKHWD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHWD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKHWD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKHWD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKHWD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x69) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKHWD m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x69) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKHWD") + } + return p +} + +// VPUNPCKLBW performs "Unpack and Interleave Low-Order Bytes into Words". +// +// Mnemonic : VPUNPCKLBW +// Supported forms : (10 forms) +// +// * VPUNPCKLBW xmm, xmm, xmm [AVX] +// * VPUNPCKLBW m128, xmm, xmm [AVX] +// * VPUNPCKLBW ymm, ymm, ymm [AVX2] +// * VPUNPCKLBW m256, ymm, ymm [AVX2] +// * VPUNPCKLBW zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKLBW m512, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKLBW xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLBW m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLBW ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLBW m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPUNPCKLBW(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKLBW", 3, Operands { v0, v1, v2 }) + // VPUNPCKLBW xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLBW ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLBW zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKLBW xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKLBW ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x60) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLBW m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x60) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKLBW") + } + return p +} + +// VPUNPCKLDQ performs "Unpack and Interleave Low-Order Doublewords into Quadwords". +// +// Mnemonic : VPUNPCKLDQ +// Supported forms : (10 forms) +// +// * VPUNPCKLDQ xmm, xmm, xmm [AVX] +// * VPUNPCKLDQ m128, xmm, xmm [AVX] +// * VPUNPCKLDQ ymm, ymm, ymm [AVX2] +// * VPUNPCKLDQ m256, ymm, ymm [AVX2] +// * VPUNPCKLDQ m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKLDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKLDQ m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLDQ m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPUNPCKLDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKLDQ", 3, Operands { v0, v1, v2 }) + // VPUNPCKLDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLDQ m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKLDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLDQ m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKLDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLDQ m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x62) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPUNPCKLDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x62) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKLDQ") + } + return p +} + +// VPUNPCKLQDQ performs "Unpack and Interleave Low-Order Quadwords into Double Quadwords". +// +// Mnemonic : VPUNPCKLQDQ +// Supported forms : (10 forms) +// +// * VPUNPCKLQDQ xmm, xmm, xmm [AVX] +// * VPUNPCKLQDQ m128, xmm, xmm [AVX] +// * VPUNPCKLQDQ ymm, ymm, ymm [AVX2] +// * VPUNPCKLQDQ m256, ymm, ymm [AVX2] +// * VPUNPCKLQDQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKLQDQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPUNPCKLQDQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLQDQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLQDQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPUNPCKLQDQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPUNPCKLQDQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKLQDQ", 3, Operands { v0, v1, v2 }) + // VPUNPCKLQDQ xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLQDQ m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLQDQ ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLQDQ m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLQDQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKLQDQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLQDQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKLQDQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLQDQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x6c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPUNPCKLQDQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x6c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKLQDQ") + } + return p +} + +// VPUNPCKLWD performs "Unpack and Interleave Low-Order Words into Doublewords". +// +// Mnemonic : VPUNPCKLWD +// Supported forms : (10 forms) +// +// * VPUNPCKLWD xmm, xmm, xmm [AVX] +// * VPUNPCKLWD m128, xmm, xmm [AVX] +// * VPUNPCKLWD ymm, ymm, ymm [AVX2] +// * VPUNPCKLWD m256, ymm, ymm [AVX2] +// * VPUNPCKLWD zmm, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKLWD m512, zmm, zmm{k}{z} [AVX512BW] +// * VPUNPCKLWD xmm, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLWD m128, xmm, xmm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLWD ymm, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// * VPUNPCKLWD m256, ymm, ymm{k}{z} [AVX512BW,AVX512VL] +// +func (self *Program) VPUNPCKLWD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPUNPCKLWD", 3, Operands { v0, v1, v2 }) + // VPUNPCKLWD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLWD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPUNPCKLWD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m512, zmm, zmm{k}{z} + if isM512(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPUNPCKLWD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m128, xmm, xmm{k}{z} + if isM128(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPUNPCKLWD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x61) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPUNPCKLWD m256, ymm, ymm{k}{z} + if isM256(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512BW) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x61) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + if p.len == 0 { + panic("invalid operands for VPUNPCKLWD") + } + return p +} + +// VPXOR performs "Packed Bitwise Logical Exclusive OR". +// +// Mnemonic : VPXOR +// Supported forms : (4 forms) +// +// * VPXOR xmm, xmm, xmm [AVX] +// * VPXOR m128, xmm, xmm [AVX] +// * VPXOR ymm, ymm, ymm [AVX2] +// * VPXOR m256, ymm, ymm [AVX2] +// +func (self *Program) VPXOR(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPXOR", 3, Operands { v0, v1, v2 }) + // VPXOR xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXOR m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VPXOR ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXOR m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX2) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VPXOR") + } + return p +} + +// VPXORD performs "Bitwise Logical Exclusive OR of Packed Doubleword Integers". +// +// Mnemonic : VPXORD +// Supported forms : (6 forms) +// +// * VPXORD m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VPXORD zmm, zmm, zmm{k}{z} [AVX512F] +// * VPXORD m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPXORD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPXORD m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPXORD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPXORD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPXORD", 3, Operands { v0, v1, v2 }) + // VPXORD m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPXORD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXORD m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPXORD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXORD m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPXORD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPXORD") + } + return p +} + +// VPXORQ performs "Bitwise Logical Exclusive OR of Packed Quadword Integers". +// +// Mnemonic : VPXORQ +// Supported forms : (6 forms) +// +// * VPXORQ m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VPXORQ zmm, zmm, zmm{k}{z} [AVX512F] +// * VPXORQ m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPXORQ xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VPXORQ m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VPXORQ ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VPXORQ(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VPXORQ", 3, Operands { v0, v1, v2 }) + // VPXORQ m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VPXORQ zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXORQ m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VPXORQ xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VPXORQ m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0xef) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VPXORQ ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0xef) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VPXORQ") + } + return p +} + +// VRANGEPD performs "Range Restriction Calculation For Packed Pairs of Double-Precision Floating-Point Values". +// +// Mnemonic : VRANGEPD +// Supported forms : (7 forms) +// +// * VRANGEPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPD imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPD imm8, zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPD imm8, ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VRANGEPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRANGEPD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRANGEPD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRANGEPD takes 4 or 5 operands") + } + // VRANGEPD imm8, m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x50) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPD imm8, ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRANGEPD") + } + return p +} + +// VRANGEPS performs "Range Restriction Calculation For Packed Pairs of Single-Precision Floating-Point Values". +// +// Mnemonic : VRANGEPS +// Supported forms : (7 forms) +// +// * VRANGEPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPS imm8, {sae}, zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPS imm8, zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VRANGEPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VRANGEPS imm8, ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VRANGEPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRANGEPS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRANGEPS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRANGEPS takes 4 or 5 operands") + } + // VRANGEPS imm8, m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, {sae}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMM(v3) && isZMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x50) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x50) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGEPS imm8, ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x50) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRANGEPS") + } + return p +} + +// VRANGESD performs "Range Restriction Calculation For a pair of Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VRANGESD +// Supported forms : (3 forms) +// +// * VRANGESD imm8, m64, xmm, xmm{k}{z} [AVX512DQ] +// * VRANGESD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512DQ] +// * VRANGESD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ] +// +func (self *Program) VRANGESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRANGESD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRANGESD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRANGESD takes 4 or 5 operands") + } + // VRANGESD imm8, m64, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x51) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGESD imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x51) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGESD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x51) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRANGESD") + } + return p +} + +// VRANGESS performs "Range Restriction Calculation For a pair of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VRANGESS +// Supported forms : (3 forms) +// +// * VRANGESS imm8, m32, xmm, xmm{k}{z} [AVX512DQ] +// * VRANGESS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512DQ] +// * VRANGESS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ] +// +func (self *Program) VRANGESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRANGESS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRANGESS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRANGESS takes 4 or 5 operands") + } + // VRANGESS imm8, m32, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x51) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGESS imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x51) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRANGESS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x51) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRANGESS") + } + return p +} + +// VRCP14PD performs "Compute Approximate Reciprocals of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VRCP14PD +// Supported forms : (6 forms) +// +// * VRCP14PD m512/m64bcst, zmm{k}{z} [AVX512F] +// * VRCP14PD zmm, zmm{k}{z} [AVX512F] +// * VRCP14PD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRCP14PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRCP14PD", 2, Operands { v0, v1 }) + // VRCP14PD m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRCP14PD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCP14PD m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VRCP14PD m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VRCP14PD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCP14PD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP14PD") + } + return p +} + +// VRCP14PS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VRCP14PS +// Supported forms : (6 forms) +// +// * VRCP14PS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VRCP14PS zmm, zmm{k}{z} [AVX512F] +// * VRCP14PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRCP14PS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRCP14PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRCP14PS", 2, Operands { v0, v1 }) + // VRCP14PS m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRCP14PS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCP14PS m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VRCP14PS m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4c) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VRCP14PS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCP14PS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x4c) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP14PS") + } + return p +} + +// VRCP14SD performs "Compute Approximate Reciprocal of a Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VRCP14SD +// Supported forms : (2 forms) +// +// * VRCP14SD xmm, xmm, xmm{k}{z} [AVX512F] +// * VRCP14SD m64, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRCP14SD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRCP14SD", 3, Operands { v0, v1, v2 }) + // VRCP14SD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRCP14SD m64, xmm, xmm{k}{z} + if isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x4d) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP14SD") + } + return p +} + +// VRCP14SS performs "Compute Approximate Reciprocal of a Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VRCP14SS +// Supported forms : (2 forms) +// +// * VRCP14SS xmm, xmm, xmm{k}{z} [AVX512F] +// * VRCP14SS m32, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRCP14SS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRCP14SS", 3, Operands { v0, v1, v2 }) + // VRCP14SS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x4d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRCP14SS m32, xmm, xmm{k}{z} + if isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x4d) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP14SS") + } + return p +} + +// VRCP28PD performs "Approximation to the Reciprocal of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRCP28PD +// Supported forms : (3 forms) +// +// * VRCP28PD m512/m64bcst, zmm{k}{z} [AVX512ER] +// * VRCP28PD {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VRCP28PD zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VRCP28PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRCP28PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VRCP28PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VRCP28PD takes 2 or 3 operands") + } + // VRCP28PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xca) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRCP28PD {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xca) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VRCP28PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xca) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP28PD") + } + return p +} + +// VRCP28PS performs "Approximation to the Reciprocal of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRCP28PS +// Supported forms : (3 forms) +// +// * VRCP28PS m512/m32bcst, zmm{k}{z} [AVX512ER] +// * VRCP28PS {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VRCP28PS zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VRCP28PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRCP28PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VRCP28PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VRCP28PS takes 2 or 3 operands") + } + // VRCP28PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xca) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRCP28PS {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xca) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VRCP28PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xca) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP28PS") + } + return p +} + +// VRCP28SD performs "Approximation to the Reciprocal of a Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRCP28SD +// Supported forms : (3 forms) +// +// * VRCP28SD m64, xmm, xmm{k}{z} [AVX512ER] +// * VRCP28SD {sae}, xmm, xmm, xmm{k}{z} [AVX512ER] +// * VRCP28SD xmm, xmm, xmm{k}{z} [AVX512ER] +// +func (self *Program) VRCP28SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRCP28SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRCP28SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRCP28SD takes 3 or 4 operands") + } + // VRCP28SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xcb) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VRCP28SD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VRCP28SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP28SD") + } + return p +} + +// VRCP28SS performs "Approximation to the Reciprocal of a Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRCP28SS +// Supported forms : (3 forms) +// +// * VRCP28SS m32, xmm, xmm{k}{z} [AVX512ER] +// * VRCP28SS {sae}, xmm, xmm, xmm{k}{z} [AVX512ER] +// * VRCP28SS xmm, xmm, xmm{k}{z} [AVX512ER] +// +func (self *Program) VRCP28SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRCP28SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRCP28SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRCP28SS takes 3 or 4 operands") + } + // VRCP28SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xcb) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VRCP28SS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VRCP28SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xcb) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRCP28SS") + } + return p +} + +// VRCPPS performs "Compute Approximate Reciprocals of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VRCPPS +// Supported forms : (4 forms) +// +// * VRCPPS xmm, xmm [AVX] +// * VRCPPS m128, xmm [AVX] +// * VRCPPS ymm, ymm [AVX] +// * VRCPPS m256, ymm [AVX] +// +func (self *Program) VRCPPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRCPPS", 2, Operands { v0, v1 }) + // VRCPPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x53) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCPPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x53) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VRCPPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x53) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRCPPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x53) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VRCPPS") + } + return p +} + +// VRCPSS performs "Compute Approximate Reciprocal of Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VRCPSS +// Supported forms : (2 forms) +// +// * VRCPSS xmm, xmm, xmm [AVX] +// * VRCPSS m32, xmm, xmm [AVX] +// +func (self *Program) VRCPSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRCPSS", 3, Operands { v0, v1, v2 }) + // VRCPSS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x53) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRCPSS m32, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x53) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VRCPSS") + } + return p +} + +// VREDUCEPD performs "Perform Reduction Transformation on Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VREDUCEPD +// Supported forms : (6 forms) +// +// * VREDUCEPD imm8, m512/m64bcst, zmm{k}{z} [AVX512DQ] +// * VREDUCEPD imm8, zmm, zmm{k}{z} [AVX512DQ] +// * VREDUCEPD imm8, m128/m64bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPD imm8, m256/m64bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPD imm8, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPD imm8, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VREDUCEPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VREDUCEPD", 3, Operands { v0, v1, v2 }) + // VREDUCEPD imm8, m512/m64bcst, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, m128/m64bcst, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, m256/m64bcst, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPD imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VREDUCEPD") + } + return p +} + +// VREDUCEPS performs "Perform Reduction Transformation on Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VREDUCEPS +// Supported forms : (6 forms) +// +// * VREDUCEPS imm8, m512/m32bcst, zmm{k}{z} [AVX512DQ] +// * VREDUCEPS imm8, zmm, zmm{k}{z} [AVX512DQ] +// * VREDUCEPS imm8, m128/m32bcst, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPS imm8, m256/m32bcst, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPS imm8, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VREDUCEPS imm8, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VREDUCEPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VREDUCEPS", 3, Operands { v0, v1, v2 }) + // VREDUCEPS imm8, m512/m32bcst, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, m128/m32bcst, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, m256/m32bcst, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x56) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCEPS imm8, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x56) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VREDUCEPS") + } + return p +} + +// VREDUCESD performs "Perform Reduction Transformation on a Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VREDUCESD +// Supported forms : (2 forms) +// +// * VREDUCESD imm8, xmm, xmm, xmm{k}{z} [AVX512DQ] +// * VREDUCESD imm8, m64, xmm, xmm{k}{z} [AVX512DQ] +// +func (self *Program) VREDUCESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VREDUCESD", 4, Operands { v0, v1, v2, v3 }) + // VREDUCESD imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x57) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCESD imm8, m64, xmm, xmm{k}{z} + if isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x57) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VREDUCESD") + } + return p +} + +// VREDUCESS performs "Perform Reduction Transformation on a Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VREDUCESS +// Supported forms : (2 forms) +// +// * VREDUCESS imm8, xmm, xmm, xmm{k}{z} [AVX512DQ] +// * VREDUCESS imm8, m32, xmm, xmm{k}{z} [AVX512DQ] +// +func (self *Program) VREDUCESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VREDUCESS", 4, Operands { v0, v1, v2, v3 }) + // VREDUCESS imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0x57) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VREDUCESS imm8, m32, xmm, xmm{k}{z} + if isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x57) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VREDUCESS") + } + return p +} + +// VRNDSCALEPD performs "Round Packed Double-Precision Floating-Point Values To Include A Given Number Of Fraction Bits". +// +// Mnemonic : VRNDSCALEPD +// Supported forms : (7 forms) +// +// * VRNDSCALEPD imm8, m512/m64bcst, zmm{k}{z} [AVX512F] +// * VRNDSCALEPD imm8, {sae}, zmm, zmm{k}{z} [AVX512F] +// * VRNDSCALEPD imm8, zmm, zmm{k}{z} [AVX512F] +// * VRNDSCALEPD imm8, m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPD imm8, m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPD imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPD imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRNDSCALEPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRNDSCALEPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRNDSCALEPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRNDSCALEPD takes 3 or 4 operands") + } + // VRNDSCALEPD imm8, m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M64bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x09) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, m128/m64bcst, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M64bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, m256/m64bcst, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M64bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPD imm8, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRNDSCALEPD") + } + return p +} + +// VRNDSCALEPS performs "Round Packed Single-Precision Floating-Point Values To Include A Given Number Of Fraction Bits". +// +// Mnemonic : VRNDSCALEPS +// Supported forms : (7 forms) +// +// * VRNDSCALEPS imm8, m512/m32bcst, zmm{k}{z} [AVX512F] +// * VRNDSCALEPS imm8, {sae}, zmm, zmm{k}{z} [AVX512F] +// * VRNDSCALEPS imm8, zmm, zmm{k}{z} [AVX512F] +// * VRNDSCALEPS imm8, m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPS imm8, m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPS imm8, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRNDSCALEPS imm8, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRNDSCALEPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRNDSCALEPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRNDSCALEPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRNDSCALEPS takes 3 or 4 operands") + } + // VRNDSCALEPS imm8, m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM512M32bcst(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[3]) << 7) | kcode(v[3]) | 0x18) + m.emit(0x08) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, zmm, zmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x48) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM128M32bcst(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM256M32bcst(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[2]), addr(v[1]), 0, kcode(v[2]), zcode(v[2]), bcode(v[1])) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x08) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALEPS imm8, ymm, ymm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x28) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRNDSCALEPS") + } + return p +} + +// VRNDSCALESD performs "Round Scalar Double-Precision Floating-Point Value To Include A Given Number Of Fraction Bits". +// +// Mnemonic : VRNDSCALESD +// Supported forms : (3 forms) +// +// * VRNDSCALESD imm8, m64, xmm, xmm{k}{z} [AVX512F] +// * VRNDSCALESD imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VRNDSCALESD imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRNDSCALESD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRNDSCALESD", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRNDSCALESD", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRNDSCALESD takes 4 or 5 operands") + } + // VRNDSCALESD imm8, m64, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM64(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0b) + m.mrsd(lcode(v[3]), addr(v[1]), 8) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALESD imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0xfd ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALESD imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRNDSCALESD") + } + return p +} + +// VRNDSCALESS performs "Round Scalar Single-Precision Floating-Point Value To Include A Given Number Of Fraction Bits". +// +// Mnemonic : VRNDSCALESS +// Supported forms : (3 forms) +// +// * VRNDSCALESS imm8, m32, xmm, xmm{k}{z} [AVX512F] +// * VRNDSCALESS imm8, {sae}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VRNDSCALESS imm8, xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRNDSCALESS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRNDSCALESS", 4, Operands { v0, v1, v2, v3 }) + case 1 : p = self.alloc("VRNDSCALESS", 5, Operands { v0, v1, v2, v3, vv[0] }) + default : panic("instruction VRNDSCALESS takes 4 or 5 operands") + } + // VRNDSCALESS imm8, m32, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isM32(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), 0) + m.emit(0x0a) + m.mrsd(lcode(v[3]), addr(v[1]), 4) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALESS imm8, {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isImm8(v0) && isSAE(v1) && isEVEXXMM(v2) && isEVEXXMM(v3) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[4]) << 7) | (ehcode(v[2]) << 5) | (ecode(v[4]) << 4))) + m.emit(0x7d ^ (hlcode(v[3]) << 3)) + m.emit((zcode(v[4]) << 7) | (0x08 ^ (ecode(v[3]) << 3)) | kcode(v[4]) | 0x10) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[4]) << 3 | lcode(v[2])) + m.imm1(toImmAny(v[0])) + }) + } + // VRNDSCALESS imm8, xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRNDSCALESS") + } + return p +} + +// VROUNDPD performs "Round Packed Double Precision Floating-Point Values". +// +// Mnemonic : VROUNDPD +// Supported forms : (4 forms) +// +// * VROUNDPD imm8, xmm, xmm [AVX] +// * VROUNDPD imm8, m128, xmm [AVX] +// * VROUNDPD imm8, ymm, ymm [AVX] +// * VROUNDPD imm8, m256, ymm [AVX] +// +func (self *Program) VROUNDPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VROUNDPD", 3, Operands { v0, v1, v2 }) + // VROUNDPD imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPD imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPD imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d) + m.emit(0x09) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPD imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0) + m.emit(0x09) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VROUNDPD") + } + return p +} + +// VROUNDPS performs "Round Packed Single Precision Floating-Point Values". +// +// Mnemonic : VROUNDPS +// Supported forms : (4 forms) +// +// * VROUNDPS imm8, xmm, xmm [AVX] +// * VROUNDPS imm8, m128, xmm [AVX] +// * VROUNDPS imm8, ymm, ymm [AVX] +// * VROUNDPS imm8, m256, ymm [AVX] +// +func (self *Program) VROUNDPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VROUNDPS", 3, Operands { v0, v1, v2 }) + // VROUNDPS imm8, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPS imm8, m128, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[2]), addr(v[1]), 0) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPS imm8, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[2]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x7d) + m.emit(0x08) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDPS imm8, m256, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x05, hcode(v[2]), addr(v[1]), 0) + m.emit(0x08) + m.mrsd(lcode(v[2]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VROUNDPS") + } + return p +} + +// VROUNDSD performs "Round Scalar Double Precision Floating-Point Values". +// +// Mnemonic : VROUNDSD +// Supported forms : (2 forms) +// +// * VROUNDSD imm8, xmm, xmm, xmm [AVX] +// * VROUNDSD imm8, m64, xmm, xmm [AVX] +// +func (self *Program) VROUNDSD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VROUNDSD", 4, Operands { v0, v1, v2, v3 }) + // VROUNDSD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0b) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDSD imm8, m64, xmm, xmm + if isImm8(v0) && isM64(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0b) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VROUNDSD") + } + return p +} + +// VROUNDSS performs "Round Scalar Single Precision Floating-Point Values". +// +// Mnemonic : VROUNDSS +// Supported forms : (2 forms) +// +// * VROUNDSS imm8, xmm, xmm, xmm [AVX] +// * VROUNDSS imm8, m32, xmm, xmm [AVX] +// +func (self *Program) VROUNDSS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VROUNDSS", 4, Operands { v0, v1, v2, v3 }) + // VROUNDSS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe3 ^ (hcode(v[3]) << 7) ^ (hcode(v[1]) << 5)) + m.emit(0x79 ^ (hlcode(v[2]) << 3)) + m.emit(0x0a) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VROUNDSS imm8, m32, xmm, xmm + if isImm8(v0) && isM32(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b11, 0x01, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0x0a) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VROUNDSS") + } + return p +} + +// VRSQRT14PD performs "Compute Approximate Reciprocals of Square Roots of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VRSQRT14PD +// Supported forms : (6 forms) +// +// * VRSQRT14PD m512/m64bcst, zmm{k}{z} [AVX512F] +// * VRSQRT14PD zmm, zmm{k}{z} [AVX512F] +// * VRSQRT14PD m128/m64bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PD m256/m64bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRSQRT14PD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRSQRT14PD", 2, Operands { v0, v1 }) + // VRSQRT14PD m512/m64bcst, zmm{k}{z} + if isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRSQRT14PD zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14PD m128/m64bcst, xmm{k}{z} + if isM128M64bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VRSQRT14PD m256/m64bcst, ymm{k}{z} + if isM256M64bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VRSQRT14PD xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14PD ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT14PD") + } + return p +} + +// VRSQRT14PS performs "Compute Approximate Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VRSQRT14PS +// Supported forms : (6 forms) +// +// * VRSQRT14PS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VRSQRT14PS zmm, zmm{k}{z} [AVX512F] +// * VRSQRT14PS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VRSQRT14PS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VRSQRT14PS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRSQRT14PS", 2, Operands { v0, v1 }) + // VRSQRT14PS m512/m32bcst, zmm{k}{z} + if isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRSQRT14PS zmm, zmm{k}{z} + if isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14PS m128/m32bcst, xmm{k}{z} + if isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VRSQRT14PS m256/m32bcst, ymm{k}{z} + if isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x4e) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VRSQRT14PS xmm, xmm{k}{z} + if isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14PS ymm, ymm{k}{z} + if isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x4e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT14PS") + } + return p +} + +// VRSQRT14SD performs "Compute Approximate Reciprocal of a Square Root of a Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VRSQRT14SD +// Supported forms : (2 forms) +// +// * VRSQRT14SD xmm, xmm, xmm{k}{z} [AVX512F] +// * VRSQRT14SD m64, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRSQRT14SD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRSQRT14SD", 3, Operands { v0, v1, v2 }) + // VRSQRT14SD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14SD m64, xmm, xmm{k}{z} + if isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x4f) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT14SD") + } + return p +} + +// VRSQRT14SS performs "Compute Approximate Reciprocal of a Square Root of a Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VRSQRT14SS +// Supported forms : (2 forms) +// +// * VRSQRT14SS xmm, xmm, xmm{k}{z} [AVX512F] +// * VRSQRT14SS m32, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VRSQRT14SS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRSQRT14SS", 3, Operands { v0, v1, v2 }) + // VRSQRT14SS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x4f) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRSQRT14SS m32, xmm, xmm{k}{z} + if isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x4f) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT14SS") + } + return p +} + +// VRSQRT28PD performs "Approximation to the Reciprocal Square Root of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRSQRT28PD +// Supported forms : (3 forms) +// +// * VRSQRT28PD m512/m64bcst, zmm{k}{z} [AVX512ER] +// * VRSQRT28PD {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VRSQRT28PD zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VRSQRT28PD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRSQRT28PD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VRSQRT28PD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VRSQRT28PD takes 2 or 3 operands") + } + // VRSQRT28PD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xcc) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRSQRT28PD {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VRSQRT28PD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT28PD") + } + return p +} + +// VRSQRT28PS performs "Approximation to the Reciprocal Square Root of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRSQRT28PS +// Supported forms : (3 forms) +// +// * VRSQRT28PS m512/m32bcst, zmm{k}{z} [AVX512ER] +// * VRSQRT28PS {sae}, zmm, zmm{k}{z} [AVX512ER] +// * VRSQRT28PS zmm, zmm{k}{z} [AVX512ER] +// +func (self *Program) VRSQRT28PS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRSQRT28PS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VRSQRT28PS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VRSQRT28PS takes 2 or 3 operands") + } + // VRSQRT28PS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0xcc) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VRSQRT28PS {sae}, zmm, zmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[2]) << 7) | kcode(v[2]) | 0x18) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VRSQRT28PS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7d) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0xcc) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT28PS") + } + return p +} + +// VRSQRT28SD performs "Approximation to the Reciprocal Square Root of a Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRSQRT28SD +// Supported forms : (3 forms) +// +// * VRSQRT28SD m64, xmm, xmm{k}{z} [AVX512ER] +// * VRSQRT28SD {sae}, xmm, xmm, xmm{k}{z} [AVX512ER] +// * VRSQRT28SD xmm, xmm, xmm{k}{z} [AVX512ER] +// +func (self *Program) VRSQRT28SD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRSQRT28SD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRSQRT28SD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRSQRT28SD takes 3 or 4 operands") + } + // VRSQRT28SD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xcd) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VRSQRT28SD {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VRSQRT28SD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT28SD") + } + return p +} + +// VRSQRT28SS performs "Approximation to the Reciprocal Square Root of a Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error". +// +// Mnemonic : VRSQRT28SS +// Supported forms : (3 forms) +// +// * VRSQRT28SS m32, xmm, xmm{k}{z} [AVX512ER] +// * VRSQRT28SS {sae}, xmm, xmm, xmm{k}{z} [AVX512ER] +// * VRSQRT28SS xmm, xmm, xmm{k}{z} [AVX512ER] +// +func (self *Program) VRSQRT28SS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VRSQRT28SS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VRSQRT28SS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VRSQRT28SS takes 3 or 4 operands") + } + // VRSQRT28SS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0xcd) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VRSQRT28SS {sae}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VRSQRT28SS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512ER) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0xcd) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRT28SS") + } + return p +} + +// VRSQRTPS performs "Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VRSQRTPS +// Supported forms : (4 forms) +// +// * VRSQRTPS xmm, xmm [AVX] +// * VRSQRTPS m128, xmm [AVX] +// * VRSQRTPS ymm, ymm [AVX] +// * VRSQRTPS m256, ymm [AVX] +// +func (self *Program) VRSQRTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VRSQRTPS", 2, Operands { v0, v1 }) + // VRSQRTPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x52) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRTPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x52) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VRSQRTPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x52) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VRSQRTPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x52) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRTPS") + } + return p +} + +// VRSQRTSS performs "Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VRSQRTSS +// Supported forms : (2 forms) +// +// * VRSQRTSS xmm, xmm, xmm [AVX] +// * VRSQRTSS m32, xmm, xmm [AVX] +// +func (self *Program) VRSQRTSS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VRSQRTSS", 3, Operands { v0, v1, v2 }) + // VRSQRTSS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x52) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VRSQRTSS m32, xmm, xmm + if isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x52) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VRSQRTSS") + } + return p +} + +// VSCALEFPD performs "Scale Packed Double-Precision Floating-Point Values With Double-Precision Floating-Point Values". +// +// Mnemonic : VSCALEFPD +// Supported forms : (7 forms) +// +// * VSCALEFPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSCALEFPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSCALEFPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSCALEFPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSCALEFPD takes 3 or 4 operands") + } + // VSCALEFPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VSCALEFPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSCALEFPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSCALEFPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VSCALEFPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSCALEFPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VSCALEFPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSCALEFPD") + } + return p +} + +// VSCALEFPS performs "Scale Packed Single-Precision Floating-Point Values With Single-Precision Floating-Point Values". +// +// Mnemonic : VSCALEFPS +// Supported forms : (7 forms) +// +// * VSCALEFPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VSCALEFPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSCALEFPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSCALEFPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSCALEFPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSCALEFPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSCALEFPS takes 3 or 4 operands") + } + // VSCALEFPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VSCALEFPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSCALEFPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSCALEFPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VSCALEFPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSCALEFPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x2c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VSCALEFPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x2c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSCALEFPS") + } + return p +} + +// VSCALEFSD performs "Scale Scalar Double-Precision Floating-Point Value With a Double-Precision Floating-Point Value". +// +// Mnemonic : VSCALEFSD +// Supported forms : (3 forms) +// +// * VSCALEFSD m64, xmm, xmm{k}{z} [AVX512F] +// * VSCALEFSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSCALEFSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSCALEFSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSCALEFSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSCALEFSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSCALEFSD takes 3 or 4 operands") + } + // VSCALEFSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VSCALEFSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSCALEFSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSCALEFSD") + } + return p +} + +// VSCALEFSS performs "Scale Scalar Single-Precision Floating-Point Value With a Single-Precision Floating-Point Value". +// +// Mnemonic : VSCALEFSS +// Supported forms : (3 forms) +// +// * VSCALEFSS m32, xmm, xmm{k}{z} [AVX512F] +// * VSCALEFSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSCALEFSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSCALEFSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSCALEFSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSCALEFSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSCALEFSS takes 3 or 4 operands") + } + // VSCALEFSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x2d) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VSCALEFSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSCALEFSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf2 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7d ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x2d) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSCALEFSS") + } + return p +} + +// VSCATTERDPD performs "Scatter Packed Double-Precision Floating-Point Values with Signed Doubleword Indices". +// +// Mnemonic : VSCATTERDPD +// Supported forms : (3 forms) +// +// * VSCATTERDPD zmm, vm32y{k} [AVX512F] +// * VSCATTERDPD xmm, vm32x{k} [AVX512F,AVX512VL] +// * VSCATTERDPD ymm, vm32x{k} [AVX512F,AVX512VL] +// +func (self *Program) VSCATTERDPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VSCATTERDPD", 2, Operands { v0, v1 }) + // VSCATTERDPD zmm, vm32y{k} + if isZMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VSCATTERDPD xmm, vm32x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VSCATTERDPD ymm, vm32x{k} + if isEVEXYMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERDPD") + } + return p +} + +// VSCATTERDPS performs "Scatter Packed Single-Precision Floating-Point Values with Signed Doubleword Indices". +// +// Mnemonic : VSCATTERDPS +// Supported forms : (3 forms) +// +// * VSCATTERDPS zmm, vm32z{k} [AVX512F] +// * VSCATTERDPS xmm, vm32x{k} [AVX512F,AVX512VL] +// * VSCATTERDPS ymm, vm32y{k} [AVX512F,AVX512VL] +// +func (self *Program) VSCATTERDPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VSCATTERDPS", 2, Operands { v0, v1 }) + // VSCATTERDPS zmm, vm32z{k} + if isZMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VSCATTERDPS xmm, vm32x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VSCATTERDPS ymm, vm32y{k} + if isEVEXYMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa2) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERDPS") + } + return p +} + +// VSCATTERPF0DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF0DPD +// Supported forms : (1 form) +// +// * VSCATTERPF0DPD vm32y{k} [AVX512PF] +// +func (self *Program) VSCATTERPF0DPD(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF0DPD", 1, Operands { v0 }) + // VSCATTERPF0DPD vm32y{k} + if isVMYk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(5, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF0DPD") + } + return p +} + +// VSCATTERPF0DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T0 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF0DPS +// Supported forms : (1 form) +// +// * VSCATTERPF0DPS vm32z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF0DPS(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF0DPS", 1, Operands { v0 }) + // VSCATTERPF0DPS vm32z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(5, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF0DPS") + } + return p +} + +// VSCATTERPF0QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF0QPD +// Supported forms : (1 form) +// +// * VSCATTERPF0QPD vm64z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF0QPD(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF0QPD", 1, Operands { v0 }) + // VSCATTERPF0QPD vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(5, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF0QPD") + } + return p +} + +// VSCATTERPF0QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T0 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF0QPS +// Supported forms : (1 form) +// +// * VSCATTERPF0QPS vm64z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF0QPS(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF0QPS", 1, Operands { v0 }) + // VSCATTERPF0QPS vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(5, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF0QPS") + } + return p +} + +// VSCATTERPF1DPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF1DPD +// Supported forms : (1 form) +// +// * VSCATTERPF1DPD vm32y{k} [AVX512PF] +// +func (self *Program) VSCATTERPF1DPD(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF1DPD", 1, Operands { v0 }) + // VSCATTERPF1DPD vm32y{k} + if isVMYk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(6, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF1DPD") + } + return p +} + +// VSCATTERPF1DPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Doubleword Indices Using T1 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF1DPS +// Supported forms : (1 form) +// +// * VSCATTERPF1DPS vm32z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF1DPS(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF1DPS", 1, Operands { v0 }) + // VSCATTERPF1DPS vm32z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc6) + m.mrsd(6, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF1DPS") + } + return p +} + +// VSCATTERPF1QPD performs "Sparse Prefetch Packed Double-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF1QPD +// Supported forms : (1 form) +// +// * VSCATTERPF1QPD vm64z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF1QPD(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF1QPD", 1, Operands { v0 }) + // VSCATTERPF1QPD vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(6, addr(v[0]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF1QPD") + } + return p +} + +// VSCATTERPF1QPS performs "Sparse Prefetch Packed Single-Precision Floating-Point Data Values with Signed Quadword Indices Using T1 Hint with Intent to Write". +// +// Mnemonic : VSCATTERPF1QPS +// Supported forms : (1 form) +// +// * VSCATTERPF1QPS vm64z{k} [AVX512PF] +// +func (self *Program) VSCATTERPF1QPS(v0 interface{}) *Instruction { + p := self.alloc("VSCATTERPF1QPS", 1, Operands { v0 }) + // VSCATTERPF1QPS vm64z{k} + if isVMZk(v0) { + self.require(ISA_AVX512PF) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, 0, addr(v[0]), 0, kcode(v[0]), 0, 0) + m.emit(0xc7) + m.mrsd(6, addr(v[0]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERPF1QPS") + } + return p +} + +// VSCATTERQPD performs "Scatter Packed Double-Precision Floating-Point Values with Signed Quadword Indices". +// +// Mnemonic : VSCATTERQPD +// Supported forms : (3 forms) +// +// * VSCATTERQPD zmm, vm64z{k} [AVX512F] +// * VSCATTERQPD xmm, vm64x{k} [AVX512F,AVX512VL] +// * VSCATTERQPD ymm, vm64y{k} [AVX512F,AVX512VL] +// +func (self *Program) VSCATTERQPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VSCATTERQPD", 2, Operands { v0, v1 }) + // VSCATTERQPD zmm, vm64z{k} + if isZMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VSCATTERQPD xmm, vm64x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + // VSCATTERQPD ymm, vm64y{k} + if isEVEXYMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x85, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 8) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERQPD") + } + return p +} + +// VSCATTERQPS performs "Scatter Packed Single-Precision Floating-Point Values with Signed Quadword Indices". +// +// Mnemonic : VSCATTERQPS +// Supported forms : (3 forms) +// +// * VSCATTERQPS ymm, vm64z{k} [AVX512F] +// * VSCATTERQPS xmm, vm64x{k} [AVX512F,AVX512VL] +// * VSCATTERQPS xmm, vm64y{k} [AVX512F,AVX512VL] +// +func (self *Program) VSCATTERQPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VSCATTERQPS", 2, Operands { v0, v1 }) + // VSCATTERQPS ymm, vm64z{k} + if isEVEXYMM(v0) && isVMZk(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b10, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VSCATTERQPS xmm, vm64x{k} + if isEVEXXMM(v0) && isVMXk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b00, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + // VSCATTERQPS xmm, vm64y{k} + if isEVEXXMM(v0) && isVMYk(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b10, 0x05, 0b01, ehcode(v[0]), addr(v[1]), 0, kcode(v[1]), 0, 0) + m.emit(0xa3) + m.mrsd(lcode(v[0]), addr(v[1]), 4) + }) + } + if p.len == 0 { + panic("invalid operands for VSCATTERQPS") + } + return p +} + +// VSHUFF32X4 performs "Shuffle 128-Bit Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VSHUFF32X4 +// Supported forms : (4 forms) +// +// * VSHUFF32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFF32X4 imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFF32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFF32X4 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFF32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFF32X4", 4, Operands { v0, v1, v2, v3 }) + // VSHUFF32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x23) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF32X4 imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x23) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x23) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF32X4 imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x23) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFF32X4") + } + return p +} + +// VSHUFF64X2 performs "Shuffle 128-Bit Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VSHUFF64X2 +// Supported forms : (4 forms) +// +// * VSHUFF64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFF64X2 imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFF64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFF64X2 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFF64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFF64X2", 4, Operands { v0, v1, v2, v3 }) + // VSHUFF64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x23) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF64X2 imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x23) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x23) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFF64X2 imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x23) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFF64X2") + } + return p +} + +// VSHUFI32X4 performs "Shuffle 128-Bit Packed Doubleword Integer Values". +// +// Mnemonic : VSHUFI32X4 +// Supported forms : (4 forms) +// +// * VSHUFI32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFI32X4 imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFI32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFI32X4 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFI32X4(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFI32X4", 4, Operands { v0, v1, v2, v3 }) + // VSHUFI32X4 imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x43) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI32X4 imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI32X4 imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x05, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x43) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI32X4 imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7d ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFI32X4") + } + return p +} + +// VSHUFI64X2 performs "Shuffle 128-Bit Packed Quadword Integer Values". +// +// Mnemonic : VSHUFI64X2 +// Supported forms : (4 forms) +// +// * VSHUFI64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFI64X2 imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFI64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFI64X2 imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFI64X2(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFI64X2", 4, Operands { v0, v1, v2, v3 }) + // VSHUFI64X2 imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x43) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI64X2 imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI64X2 imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b11, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0x43) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFI64X2 imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf3 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0x43) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFI64X2") + } + return p +} + +// VSHUFPD performs "Shuffle Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VSHUFPD +// Supported forms : (10 forms) +// +// * VSHUFPD imm8, xmm, xmm, xmm [AVX] +// * VSHUFPD imm8, m128, xmm, xmm [AVX] +// * VSHUFPD imm8, ymm, ymm, ymm [AVX] +// * VSHUFPD imm8, m256, ymm, ymm [AVX] +// * VSHUFPD imm8, m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFPD imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFPD imm8, m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPD imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPD imm8, m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPD imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFPD(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFPD", 4, Operands { v0, v1, v2, v3 }) + // VSHUFPD imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m512/m64bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M64bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m128/m64bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M64bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, m256/m64bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M64bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPD imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFPD") + } + return p +} + +// VSHUFPS performs "Shuffle Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VSHUFPS +// Supported forms : (10 forms) +// +// * VSHUFPS imm8, xmm, xmm, xmm [AVX] +// * VSHUFPS imm8, m128, xmm, xmm [AVX] +// * VSHUFPS imm8, ymm, ymm, ymm [AVX] +// * VSHUFPS imm8, m256, ymm, ymm [AVX] +// * VSHUFPS imm8, m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSHUFPS imm8, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSHUFPS imm8, m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPS imm8, xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPS imm8, m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSHUFPS imm8, ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSHUFPS(v0 interface{}, v1 interface{}, v2 interface{}, v3 interface{}) *Instruction { + p := self.alloc("VSHUFPS", 4, Operands { v0, v1, v2, v3 }) + // VSHUFPS imm8, xmm, xmm, xmm + if isImm8(v0) && isXMM(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m128, xmm, xmm + if isImm8(v0) && isM128(v1) && isXMM(v2) && isXMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, ymm, ymm, ymm + if isImm8(v0) && isYMM(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[3]), v[1], hlcode(v[2])) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m256, ymm, ymm + if isImm8(v0) && isM256(v1) && isYMM(v2) && isYMM(v3) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[3]), addr(v[1]), hlcode(v[2])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m512/m32bcst, zmm, zmm{k}{z} + if isImm8(v0) && isM512M32bcst(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 64) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, zmm, zmm, zmm{k}{z} + if isImm8(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(v3) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x40) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m128/m32bcst, xmm, xmm{k}{z} + if isImm8(v0) && isM128M32bcst(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 16) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, xmm, xmm, xmm{k}{z} + if isImm8(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x00) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, m256/m32bcst, ymm, ymm{k}{z} + if isImm8(v0) && isM256M32bcst(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[3]), addr(v[1]), vcode(v[2]), kcode(v[3]), zcode(v[3]), bcode(v[1])) + m.emit(0xc6) + m.mrsd(lcode(v[3]), addr(v[1]), 32) + m.imm1(toImmAny(v[0])) + }) + } + // VSHUFPS imm8, ymm, ymm, ymm{k}{z} + if isImm8(v0) && isEVEXYMM(v1) && isEVEXYMM(v2) && isYMMkz(v3) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x20) + m.emit(0xc6) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSHUFPS") + } + return p +} + +// VSQRTPD performs "Compute Square Roots of Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VSQRTPD +// Supported forms : (11 forms) +// +// * VSQRTPD xmm, xmm [AVX] +// * VSQRTPD m128, xmm [AVX] +// * VSQRTPD ymm, ymm [AVX] +// * VSQRTPD m256, ymm [AVX] +// * VSQRTPD m512/m64bcst, zmm{k}{z} [AVX512F] +// * VSQRTPD {er}, zmm, zmm{k}{z} [AVX512F] +// * VSQRTPD zmm, zmm{k}{z} [AVX512F] +// * VSQRTPD m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPD m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPD xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPD ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSQRTPD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSQRTPD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VSQRTPD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VSQRTPD takes 2 or 3 operands") + } + // VSQRTPD xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPD m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VSQRTPD ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), v[0], 0) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPD m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[1]), addr(v[0]), 0) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VSQRTPD m512/m64bcst, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VSQRTPD {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VSQRTPD zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPD m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VSQRTPD m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VSQRTPD xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPD ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSQRTPD") + } + return p +} + +// VSQRTPS performs "Compute Square Roots of Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VSQRTPS +// Supported forms : (11 forms) +// +// * VSQRTPS xmm, xmm [AVX] +// * VSQRTPS m128, xmm [AVX] +// * VSQRTPS ymm, ymm [AVX] +// * VSQRTPS m256, ymm [AVX] +// * VSQRTPS m512/m32bcst, zmm{k}{z} [AVX512F] +// * VSQRTPS {er}, zmm, zmm{k}{z} [AVX512F] +// * VSQRTPS zmm, zmm{k}{z} [AVX512F] +// * VSQRTPS m128/m32bcst, xmm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPS m256/m32bcst, ymm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPS xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSQRTPS ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSQRTPS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSQRTPS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VSQRTPS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VSQRTPS takes 2 or 3 operands") + } + // VSQRTPS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPS m128, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VSQRTPS ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), v[0], 0) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPS m256, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[1]), addr(v[0]), 0) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VSQRTPS m512/m32bcst, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 64) + }) + } + // VSQRTPS {er}, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[2]) << 7) | (vcode(v[0]) << 5) | kcode(v[2]) | 0x18) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VSQRTPS zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMMkz(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x48) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPS m128/m32bcst, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 16) + }) + } + // VSQRTPS m256/m32bcst, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[1]), addr(v[0]), 0, kcode(v[1]), zcode(v[1]), bcode(v[0])) + m.emit(0x51) + m.mrsd(lcode(v[1]), addr(v[0]), 32) + }) + } + // VSQRTPS xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isXMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x08) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VSQRTPS ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isYMMkz(v1) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit((zcode(v[1]) << 7) | kcode(v[1]) | 0x28) + m.emit(0x51) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSQRTPS") + } + return p +} + +// VSQRTSD performs "Compute Square Root of Scalar Double-Precision Floating-Point Value". +// +// Mnemonic : VSQRTSD +// Supported forms : (5 forms) +// +// * VSQRTSD xmm, xmm, xmm [AVX] +// * VSQRTSD m64, xmm, xmm [AVX] +// * VSQRTSD m64, xmm, xmm{k}{z} [AVX512F] +// * VSQRTSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSQRTSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSQRTSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSQRTSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSQRTSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSQRTSD takes 3 or 4 operands") + } + // VSQRTSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSQRTSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x51) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSQRTSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x51) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VSQRTSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x51) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSQRTSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSQRTSD") + } + return p +} + +// VSQRTSS performs "Compute Square Root of Scalar Single-Precision Floating-Point Value". +// +// Mnemonic : VSQRTSS +// Supported forms : (5 forms) +// +// * VSQRTSS xmm, xmm, xmm [AVX] +// * VSQRTSS m32, xmm, xmm [AVX] +// * VSQRTSS m32, xmm, xmm{k}{z} [AVX512F] +// * VSQRTSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSQRTSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSQRTSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSQRTSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSQRTSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSQRTSS takes 3 or 4 operands") + } + // VSQRTSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSQRTSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x51) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSQRTSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x51) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VSQRTSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x51) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSQRTSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x51) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSQRTSS") + } + return p +} + +// VSTMXCSR performs "Store MXCSR Register State". +// +// Mnemonic : VSTMXCSR +// Supported forms : (1 form) +// +// * VSTMXCSR m32 [AVX] +// +func (self *Program) VSTMXCSR(v0 interface{}) *Instruction { + p := self.alloc("VSTMXCSR", 1, Operands { v0 }) + // VSTMXCSR m32 + if isM32(v0) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, addr(v[0]), 0) + m.emit(0xae) + m.mrsd(3, addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VSTMXCSR") + } + return p +} + +// VSUBPD performs "Subtract Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VSUBPD +// Supported forms : (11 forms) +// +// * VSUBPD xmm, xmm, xmm [AVX] +// * VSUBPD m128, xmm, xmm [AVX] +// * VSUBPD ymm, ymm, ymm [AVX] +// * VSUBPD m256, ymm, ymm [AVX] +// * VSUBPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VSUBPD {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSUBPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VSUBPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSUBPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSUBPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSUBPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSUBPD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSUBPD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSUBPD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSUBPD takes 3 or 4 operands") + } + // VSUBPD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPD m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBPD ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPD m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBPD m512/m64bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VSUBPD {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xfd ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSUBPD zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPD m128/m64bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VSUBPD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPD m256/m64bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VSUBPD ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSUBPD") + } + return p +} + +// VSUBPS performs "Subtract Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VSUBPS +// Supported forms : (11 forms) +// +// * VSUBPS xmm, xmm, xmm [AVX] +// * VSUBPS m128, xmm, xmm [AVX] +// * VSUBPS ymm, ymm, ymm [AVX] +// * VSUBPS m256, ymm, ymm [AVX] +// * VSUBPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VSUBPS {er}, zmm, zmm, zmm{k}{z} [AVX512F] +// * VSUBPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VSUBPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSUBPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VSUBPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VSUBPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VSUBPS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSUBPS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSUBPS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSUBPS takes 3 or 4 operands") + } + // VSUBPS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPS m128, xmm, xmm + if len(vv) == 0 && isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBPS ymm, ymm, ymm + if len(vv) == 0 && isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPS m256, ymm, ymm + if len(vv) == 0 && isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBPS m512/m32bcst, zmm, zmm{k}{z} + if len(vv) == 0 && isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VSUBPS {er}, zmm, zmm, zmm{k}{z} + if len(vv) == 1 && isER(v0) && isZMM(v1) && isZMM(v2) && isZMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7c ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSUBPS zmm, zmm, zmm{k}{z} + if len(vv) == 0 && isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPS m128/m32bcst, xmm, xmm{k}{z} + if len(vv) == 0 && isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VSUBPS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBPS m256/m32bcst, ymm, ymm{k}{z} + if len(vv) == 0 && isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VSUBPS ymm, ymm, ymm{k}{z} + if len(vv) == 0 && isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSUBPS") + } + return p +} + +// VSUBSD performs "Subtract Scalar Double-Precision Floating-Point Values". +// +// Mnemonic : VSUBSD +// Supported forms : (5 forms) +// +// * VSUBSD xmm, xmm, xmm [AVX] +// * VSUBSD m64, xmm, xmm [AVX] +// * VSUBSD m64, xmm, xmm{k}{z} [AVX512F] +// * VSUBSD {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSUBSD xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSUBSD(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSUBSD", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSUBSD", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSUBSD takes 3 or 4 operands") + } + // VSUBSD xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBSD m64, xmm, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(3, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBSD m64, xmm, xmm{k}{z} + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x87, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 8) + }) + } + // VSUBSD {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0xff ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSUBSD xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xff ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSUBSD") + } + return p +} + +// VSUBSS performs "Subtract Scalar Single-Precision Floating-Point Values". +// +// Mnemonic : VSUBSS +// Supported forms : (5 forms) +// +// * VSUBSS xmm, xmm, xmm [AVX] +// * VSUBSS m32, xmm, xmm [AVX] +// * VSUBSS m32, xmm, xmm{k}{z} [AVX512F] +// * VSUBSS {er}, xmm, xmm, xmm{k}{z} [AVX512F] +// * VSUBSS xmm, xmm, xmm{k}{z} [AVX512F] +// +func (self *Program) VSUBSS(v0 interface{}, v1 interface{}, v2 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VSUBSS", 3, Operands { v0, v1, v2 }) + case 1 : p = self.alloc("VSUBSS", 4, Operands { v0, v1, v2, vv[0] }) + default : panic("instruction VSUBSS takes 3 or 4 operands") + } + // VSUBSS xmm, xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VSUBSS m32, xmm, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(2, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VSUBSS m32, xmm, xmm{k}{z} + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x06, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), 0) + m.emit(0x5c) + m.mrsd(lcode(v[2]), addr(v[0]), 4) + }) + } + // VSUBSS {er}, xmm, xmm, xmm{k}{z} + if len(vv) == 1 && isER(v0) && isEVEXXMM(v1) && isEVEXXMM(v2) && isXMMkz(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[3]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[3]) << 4))) + m.emit(0x7e ^ (hlcode(v[2]) << 3)) + m.emit((zcode(v[3]) << 7) | (vcode(v[0]) << 5) | (0x08 ^ (ecode(v[2]) << 3)) | kcode(v[3]) | 0x10) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[3]) << 3 | lcode(v[1])) + }) + } + // VSUBSS xmm, xmm, xmm{k}{z} + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7e ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x5c) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VSUBSS") + } + return p +} + +// VTESTPD performs "Packed Double-Precision Floating-Point Bit Test". +// +// Mnemonic : VTESTPD +// Supported forms : (4 forms) +// +// * VTESTPD xmm, xmm [AVX] +// * VTESTPD m128, xmm [AVX] +// * VTESTPD ymm, ymm [AVX] +// * VTESTPD m256, ymm [AVX] +// +func (self *Program) VTESTPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VTESTPD", 2, Operands { v0, v1 }) + // VTESTPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VTESTPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VTESTPD ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x0f) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VTESTPD m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x0f) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VTESTPD") + } + return p +} + +// VTESTPS performs "Packed Single-Precision Floating-Point Bit Test". +// +// Mnemonic : VTESTPS +// Supported forms : (4 forms) +// +// * VTESTPS xmm, xmm [AVX] +// * VTESTPS m128, xmm [AVX] +// * VTESTPS ymm, ymm [AVX] +// * VTESTPS m256, ymm [AVX] +// +func (self *Program) VTESTPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("VTESTPS", 2, Operands { v0, v1 }) + // VTESTPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x79) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VTESTPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x01, hcode(v[1]), addr(v[0]), 0) + m.emit(0x0e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VTESTPS ymm, ymm + if isYMM(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xc4) + m.emit(0xe2 ^ (hcode(v[1]) << 7) ^ (hcode(v[0]) << 5)) + m.emit(0x7d) + m.emit(0x0e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VTESTPS m256, ymm + if isM256(v0) && isYMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex3(0xc4, 0b10, 0x05, hcode(v[1]), addr(v[0]), 0) + m.emit(0x0e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for VTESTPS") + } + return p +} + +// VUCOMISD performs "Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : VUCOMISD +// Supported forms : (5 forms) +// +// * VUCOMISD xmm, xmm [AVX] +// * VUCOMISD m64, xmm [AVX] +// * VUCOMISD m64, xmm [AVX512F] +// * VUCOMISD {sae}, xmm, xmm [AVX512F] +// * VUCOMISD xmm, xmm [AVX512F] +// +func (self *Program) VUCOMISD(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VUCOMISD", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VUCOMISD", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VUCOMISD takes 2 or 3 operands") + } + // VUCOMISD xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), v[0], 0) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VUCOMISD m64, xmm + if len(vv) == 0 && isM64(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VUCOMISD m64, xmm + if len(vv) == 0 && isM64(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 8) + }) + } + // VUCOMISD {sae}, xmm, xmm + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd) + m.emit(0x18) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VUCOMISD xmm, xmm + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0xfd) + m.emit(0x48) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUCOMISD") + } + return p +} + +// VUCOMISS performs "Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS". +// +// Mnemonic : VUCOMISS +// Supported forms : (5 forms) +// +// * VUCOMISS xmm, xmm [AVX] +// * VUCOMISS m32, xmm [AVX] +// * VUCOMISS m32, xmm [AVX512F] +// * VUCOMISS {sae}, xmm, xmm [AVX512F] +// * VUCOMISS xmm, xmm [AVX512F] +// +func (self *Program) VUCOMISS(v0 interface{}, v1 interface{}, vv ...interface{}) *Instruction { + var p *Instruction + switch len(vv) { + case 0 : p = self.alloc("VUCOMISS", 2, Operands { v0, v1 }) + case 1 : p = self.alloc("VUCOMISS", 3, Operands { v0, v1, vv[0] }) + default : panic("instruction VUCOMISS takes 2 or 3 operands") + } + // VUCOMISS xmm, xmm + if len(vv) == 0 && isXMM(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), v[0], 0) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // VUCOMISS m32, xmm + if len(vv) == 0 && isM32(v0) && isXMM(v1) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[1]), addr(v[0]), 0) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // VUCOMISS m32, xmm + if len(vv) == 0 && isM32(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[1]), addr(v[0]), 0, 0, 0, 0) + m.emit(0x2e) + m.mrsd(lcode(v[1]), addr(v[0]), 4) + }) + } + // VUCOMISS {sae}, xmm, xmm + if len(vv) == 1 && isSAE(v0) && isEVEXXMM(v1) && isEVEXXMM(vv[0]) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[1]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c) + m.emit(0x18) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[1])) + }) + } + // VUCOMISS xmm, xmm + if len(vv) == 0 && isEVEXXMM(v0) && isEVEXXMM(v1) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[1]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[1]) << 4))) + m.emit(0x7c) + m.emit(0x48) + m.emit(0x2e) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUCOMISS") + } + return p +} + +// VUNPCKHPD performs "Unpack and Interleave High Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VUNPCKHPD +// Supported forms : (10 forms) +// +// * VUNPCKHPD xmm, xmm, xmm [AVX] +// * VUNPCKHPD m128, xmm, xmm [AVX] +// * VUNPCKHPD ymm, ymm, ymm [AVX] +// * VUNPCKHPD m256, ymm, ymm [AVX] +// * VUNPCKHPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKHPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKHPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VUNPCKHPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VUNPCKHPD", 3, Operands { v0, v1, v2 }) + // VUNPCKHPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKHPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKHPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VUNPCKHPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VUNPCKHPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VUNPCKHPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUNPCKHPD") + } + return p +} + +// VUNPCKHPS performs "Unpack and Interleave High Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VUNPCKHPS +// Supported forms : (10 forms) +// +// * VUNPCKHPS xmm, xmm, xmm [AVX] +// * VUNPCKHPS m128, xmm, xmm [AVX] +// * VUNPCKHPS ymm, ymm, ymm [AVX] +// * VUNPCKHPS m256, ymm, ymm [AVX] +// * VUNPCKHPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKHPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKHPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKHPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VUNPCKHPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VUNPCKHPS", 3, Operands { v0, v1, v2 }) + // VUNPCKHPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKHPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKHPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VUNPCKHPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VUNPCKHPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKHPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x15) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VUNPCKHPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x15) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUNPCKHPS") + } + return p +} + +// VUNPCKLPD performs "Unpack and Interleave Low Packed Double-Precision Floating-Point Values". +// +// Mnemonic : VUNPCKLPD +// Supported forms : (10 forms) +// +// * VUNPCKLPD xmm, xmm, xmm [AVX] +// * VUNPCKLPD m128, xmm, xmm [AVX] +// * VUNPCKLPD ymm, ymm, ymm [AVX] +// * VUNPCKLPD m256, ymm, ymm [AVX] +// * VUNPCKLPD m512/m64bcst, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKLPD zmm, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKLPD m128/m64bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPD xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPD m256/m64bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPD ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VUNPCKLPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VUNPCKLPD", 3, Operands { v0, v1, v2 }) + // VUNPCKLPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKLPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKLPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VUNPCKLPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VUNPCKLPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VUNPCKLPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUNPCKLPD") + } + return p +} + +// VUNPCKLPS performs "Unpack and Interleave Low Packed Single-Precision Floating-Point Values". +// +// Mnemonic : VUNPCKLPS +// Supported forms : (10 forms) +// +// * VUNPCKLPS xmm, xmm, xmm [AVX] +// * VUNPCKLPS m128, xmm, xmm [AVX] +// * VUNPCKLPS ymm, ymm, ymm [AVX] +// * VUNPCKLPS m256, ymm, ymm [AVX] +// * VUNPCKLPS m512/m32bcst, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKLPS zmm, zmm, zmm{k}{z} [AVX512F] +// * VUNPCKLPS m128/m32bcst, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPS xmm, xmm, xmm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPS m256/m32bcst, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// * VUNPCKLPS ymm, ymm, ymm{k}{z} [AVX512F,AVX512VL] +// +func (self *Program) VUNPCKLPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VUNPCKLPS", 3, Operands { v0, v1, v2 }) + // VUNPCKLPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKLPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VUNPCKLPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VUNPCKLPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VUNPCKLPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VUNPCKLPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x14) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VUNPCKLPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512F) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x14) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VUNPCKLPS") + } + return p +} + +// VXORPD performs "Bitwise Logical XOR for Double-Precision Floating-Point Values". +// +// Mnemonic : VXORPD +// Supported forms : (10 forms) +// +// * VXORPD xmm, xmm, xmm [AVX] +// * VXORPD m128, xmm, xmm [AVX] +// * VXORPD ymm, ymm, ymm [AVX] +// * VXORPD m256, ymm, ymm [AVX] +// * VXORPD m512/m64bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VXORPD zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VXORPD m128/m64bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPD xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPD m256/m64bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPD ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VXORPD(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VXORPD", 3, Operands { v0, v1, v2 }) + // VXORPD xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPD m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(1, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VXORPD ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPD m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(5, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VXORPD m512/m64bcst, zmm, zmm{k}{z} + if isM512M64bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VXORPD zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPD m128/m64bcst, xmm, xmm{k}{z} + if isM128M64bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VXORPD xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPD m256/m64bcst, ymm, ymm{k}{z} + if isM256M64bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x85, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VXORPD ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0xfd ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VXORPD") + } + return p +} + +// VXORPS performs "Bitwise Logical XOR for Single-Precision Floating-Point Values". +// +// Mnemonic : VXORPS +// Supported forms : (10 forms) +// +// * VXORPS xmm, xmm, xmm [AVX] +// * VXORPS m128, xmm, xmm [AVX] +// * VXORPS ymm, ymm, ymm [AVX] +// * VXORPS m256, ymm, ymm [AVX] +// * VXORPS m512/m32bcst, zmm, zmm{k}{z} [AVX512DQ] +// * VXORPS zmm, zmm, zmm{k}{z} [AVX512DQ] +// * VXORPS m128/m32bcst, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPS xmm, xmm, xmm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPS m256/m32bcst, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// * VXORPS ymm, ymm, ymm{k}{z} [AVX512DQ,AVX512VL] +// +func (self *Program) VXORPS(v0 interface{}, v1 interface{}, v2 interface{}) *Instruction { + p := self.alloc("VXORPS", 3, Operands { v0, v1, v2 }) + // VXORPS xmm, xmm, xmm + if isXMM(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPS m128, xmm, xmm + if isM128(v0) && isXMM(v1) && isXMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VXORPS ymm, ymm, ymm + if isYMM(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), v[0], hlcode(v[1])) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPS m256, ymm, ymm + if isM256(v0) && isYMM(v1) && isYMM(v2) { + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, hcode(v[2]), addr(v[0]), hlcode(v[1])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 1) + }) + } + // VXORPS m512/m32bcst, zmm, zmm{k}{z} + if isM512M32bcst(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b10, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 64) + }) + } + // VXORPS zmm, zmm, zmm{k}{z} + if isZMM(v0) && isZMM(v1) && isZMMkz(v2) { + self.require(ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x40) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPS m128/m32bcst, xmm, xmm{k}{z} + if isM128M32bcst(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b00, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 16) + }) + } + // VXORPS xmm, xmm, xmm{k}{z} + if isEVEXXMM(v0) && isEVEXXMM(v1) && isXMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x00) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + // VXORPS m256/m32bcst, ymm, ymm{k}{z} + if isM256M32bcst(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.evex(0b01, 0x04, 0b01, ehcode(v[2]), addr(v[0]), vcode(v[1]), kcode(v[2]), zcode(v[2]), bcode(v[0])) + m.emit(0x57) + m.mrsd(lcode(v[2]), addr(v[0]), 32) + }) + } + // VXORPS ymm, ymm, ymm{k}{z} + if isEVEXYMM(v0) && isEVEXYMM(v1) && isYMMkz(v2) { + self.require(ISA_AVX512VL | ISA_AVX512DQ) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x62) + m.emit(0xf1 ^ ((hcode(v[2]) << 7) | (ehcode(v[0]) << 5) | (ecode(v[2]) << 4))) + m.emit(0x7c ^ (hlcode(v[1]) << 3)) + m.emit((zcode(v[2]) << 7) | (0x08 ^ (ecode(v[1]) << 3)) | kcode(v[2]) | 0x20) + m.emit(0x57) + m.emit(0xc0 | lcode(v[2]) << 3 | lcode(v[0])) + }) + } + if p.len == 0 { + panic("invalid operands for VXORPS") + } + return p +} + +// VZEROALL performs "Zero All YMM Registers". +// +// Mnemonic : VZEROALL +// Supported forms : (1 form) +// +// * VZEROALL [AVX] +// +func (self *Program) VZEROALL() *Instruction { + p := self.alloc("VZEROALL", 0, Operands { }) + // VZEROALL + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(4, 0, nil, 0) + m.emit(0x77) + }) + return p +} + +// VZEROUPPER performs "Zero Upper Bits of YMM Registers". +// +// Mnemonic : VZEROUPPER +// Supported forms : (1 form) +// +// * VZEROUPPER [AVX] +// +func (self *Program) VZEROUPPER() *Instruction { + p := self.alloc("VZEROUPPER", 0, Operands { }) + // VZEROUPPER + self.require(ISA_AVX) + p.domain = DomainAVX + p.add(0, func(m *_Encoding, v []interface{}) { + m.vex2(0, 0, nil, 0) + m.emit(0x77) + }) + return p +} + +// XADDB performs "Exchange and Add". +// +// Mnemonic : XADD +// Supported forms : (2 forms) +// +// * XADDB r8, r8 +// * XADDB r8, m8 +// +func (self *Program) XADDB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XADDB", 2, Operands { v0, v1 }) + // XADDB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x0f) + m.emit(0xc0) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // XADDB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x0f) + m.emit(0xc0) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XADDB") + } + return p +} + +// XADDL performs "Exchange and Add". +// +// Mnemonic : XADD +// Supported forms : (2 forms) +// +// * XADDL r32, r32 +// * XADDL r32, m32 +// +func (self *Program) XADDL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XADDL", 2, Operands { v0, v1 }) + // XADDL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // XADDL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XADDL") + } + return p +} + +// XADDQ performs "Exchange and Add". +// +// Mnemonic : XADD +// Supported forms : (2 forms) +// +// * XADDQ r64, r64 +// * XADDQ r64, m64 +// +func (self *Program) XADDQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XADDQ", 2, Operands { v0, v1 }) + // XADDQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x0f) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // XADDQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x0f) + m.emit(0xc1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XADDQ") + } + return p +} + +// XADDW performs "Exchange and Add". +// +// Mnemonic : XADD +// Supported forms : (2 forms) +// +// * XADDW r16, r16 +// * XADDW r16, m16 +// +func (self *Program) XADDW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XADDW", 2, Operands { v0, v1 }) + // XADDW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x0f) + m.emit(0xc1) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + } + // XADDW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x0f) + m.emit(0xc1) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XADDW") + } + return p +} + +// XCHGB performs "Exchange Register/Memory with Register". +// +// Mnemonic : XCHG +// Supported forms : (3 forms) +// +// * XCHGB r8, r8 +// * XCHGB m8, r8 +// * XCHGB r8, m8 +// +func (self *Program) XCHGB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XCHGB", 2, Operands { v0, v1 }) + // XCHGB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x86) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x86) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XCHGB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x86) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XCHGB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x86) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XCHGB") + } + return p +} + +// XCHGL performs "Exchange Register/Memory with Register". +// +// Mnemonic : XCHG +// Supported forms : (5 forms) +// +// * XCHGL r32, eax +// * XCHGL eax, r32 +// * XCHGL r32, r32 +// * XCHGL m32, r32 +// * XCHGL r32, m32 +// +func (self *Program) XCHGL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XCHGL", 2, Operands { v0, v1 }) + // XCHGL r32, eax + if isReg32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[0], false) + m.emit(0x90 | lcode(v[0])) + }) + } + // XCHGL eax, r32 + if v0 == EAX && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x90 | lcode(v[1])) + }) + } + // XCHGL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x87) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x87) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XCHGL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x87) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XCHGL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x87) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XCHGL") + } + return p +} + +// XCHGQ performs "Exchange Register/Memory with Register". +// +// Mnemonic : XCHG +// Supported forms : (5 forms) +// +// * XCHGQ r64, rax +// * XCHGQ rax, r64 +// * XCHGQ r64, r64 +// * XCHGQ m64, r64 +// * XCHGQ r64, m64 +// +func (self *Program) XCHGQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XCHGQ", 2, Operands { v0, v1 }) + // XCHGQ r64, rax + if isReg64(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0])) + m.emit(0x90 | lcode(v[0])) + }) + } + // XCHGQ rax, r64 + if v0 == RAX && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x90 | lcode(v[1])) + }) + } + // XCHGQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x87) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x87) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XCHGQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x87) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XCHGQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x87) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XCHGQ") + } + return p +} + +// XCHGW performs "Exchange Register/Memory with Register". +// +// Mnemonic : XCHG +// Supported forms : (5 forms) +// +// * XCHGW r16, ax +// * XCHGW ax, r16 +// * XCHGW r16, r16 +// * XCHGW m16, r16 +// * XCHGW r16, m16 +// +func (self *Program) XCHGW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XCHGW", 2, Operands { v0, v1 }) + // XCHGW r16, ax + if isReg16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[0], false) + m.emit(0x90 | lcode(v[0])) + }) + } + // XCHGW ax, r16 + if v0 == AX && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x90 | lcode(v[1])) + }) + } + // XCHGW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x87) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x87) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XCHGW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x87) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XCHGW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x87) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XCHGW") + } + return p +} + +// XGETBV performs "Get Value of Extended Control Register". +// +// Mnemonic : XGETBV +// Supported forms : (1 form) +// +// * XGETBV +// +func (self *Program) XGETBV() *Instruction { + p := self.alloc("XGETBV", 0, Operands { }) + // XGETBV + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x0f) + m.emit(0x01) + m.emit(0xd0) + }) + return p +} + +// XLATB performs "Table Look-up Translation". +// +// Mnemonic : XLATB +// Supported forms : (2 forms) +// +// * XLATB +// * XLATB +// +func (self *Program) XLATB() *Instruction { + p := self.alloc("XLATB", 0, Operands { }) + // XLATB + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0xd7) + }) + // XLATB + p.domain = DomainMisc + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0xd7) + }) + return p +} + +// XORB performs "Logical Exclusive OR". +// +// Mnemonic : XOR +// Supported forms : (6 forms) +// +// * XORB imm8, al +// * XORB imm8, r8 +// * XORB r8, r8 +// * XORB m8, r8 +// * XORB imm8, m8 +// * XORB r8, m8 +// +func (self *Program) XORB(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORB", 2, Operands { v0, v1 }) + // XORB imm8, al + if isImm8(v0) && v1 == AL { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x34) + m.imm1(toImmAny(v[0])) + }) + } + // XORB imm8, r8 + if isImm8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], isReg8REX(v[1])) + m.emit(0x80) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // XORB r8, r8 + if isReg8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x30) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], isReg8REX(v[0]) || isReg8REX(v[1])) + m.emit(0x32) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORB m8, r8 + if isM8(v0) && isReg8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), isReg8REX(v[1])) + m.emit(0x32) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XORB imm8, m8 + if isImm8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x80) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // XORB r8, m8 + if isReg8(v0) && isM8(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), isReg8REX(v[0])) + m.emit(0x30) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORB") + } + return p +} + +// XORL performs "Logical Exclusive OR". +// +// Mnemonic : XOR +// Supported forms : (8 forms) +// +// * XORL imm32, eax +// * XORL imm8, r32 +// * XORL imm32, r32 +// * XORL r32, r32 +// * XORL m32, r32 +// * XORL imm8, m32 +// * XORL imm32, m32 +// * XORL r32, m32 +// +func (self *Program) XORL(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORL", 2, Operands { v0, v1 }) + // XORL imm32, eax + if isImm32(v0) && v1 == EAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x35) + m.imm4(toImmAny(v[0])) + }) + } + // XORL imm8, r32 + if isImm8Ext(v0, 4) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // XORL imm32, r32 + if isImm32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xf0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // XORL r32, r32 + if isReg32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORL m32, r32 + if isM32(v0) && isReg32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XORL imm8, m32 + if isImm8Ext(v0, 4) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // XORL imm32, m32 + if isImm32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(6, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // XORL r32, m32 + if isReg32(v0) && isM32(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORL") + } + return p +} + +// XORPD performs "Bitwise Logical XOR for Double-Precision Floating-Point Values". +// +// Mnemonic : XORPD +// Supported forms : (2 forms) +// +// * XORPD xmm, xmm [SSE2] +// * XORPD m128, xmm [SSE2] +// +func (self *Program) XORPD(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORPD", 2, Operands { v0, v1 }) + // XORPD xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x57) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORPD m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE2) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x57) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORPD") + } + return p +} + +// XORPS performs "Bitwise Logical XOR for Single-Precision Floating-Point Values". +// +// Mnemonic : XORPS +// Supported forms : (2 forms) +// +// * XORPS xmm, xmm [SSE] +// * XORPS m128, xmm [SSE] +// +func (self *Program) XORPS(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORPS", 2, Operands { v0, v1 }) + // XORPS xmm, xmm + if isXMM(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x0f) + m.emit(0x57) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORPS m128, xmm + if isM128(v0) && isXMM(v1) { + self.require(ISA_SSE) + p.domain = DomainMMXSSE + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x0f) + m.emit(0x57) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORPS") + } + return p +} + +// XORQ performs "Logical Exclusive OR". +// +// Mnemonic : XOR +// Supported forms : (8 forms) +// +// * XORQ imm32, rax +// * XORQ imm8, r64 +// * XORQ imm32, r64 +// * XORQ r64, r64 +// * XORQ m64, r64 +// * XORQ imm8, m64 +// * XORQ imm32, m64 +// * XORQ r64, m64 +// +func (self *Program) XORQ(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORQ", 2, Operands { v0, v1 }) + // XORQ imm32, rax + if isImm32(v0) && v1 == RAX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48) + m.emit(0x35) + m.imm4(toImmAny(v[0])) + }) + } + // XORQ imm8, r64 + if isImm8Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x83) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // XORQ imm32, r64 + if isImm32Ext(v0, 8) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1])) + m.emit(0x81) + m.emit(0xf0 | lcode(v[1])) + m.imm4(toImmAny(v[0])) + }) + } + // XORQ r64, r64 + if isReg64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[0]) << 2 | hcode(v[1])) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x48 | hcode(v[1]) << 2 | hcode(v[0])) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORQ m64, r64 + if isM64(v0) && isReg64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[1]), addr(v[0])) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XORQ imm8, m64 + if isImm8Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x83) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // XORQ imm32, m64 + if isImm32Ext(v0, 8) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, 0, addr(v[1])) + m.emit(0x81) + m.mrsd(6, addr(v[1]), 1) + m.imm4(toImmAny(v[0])) + }) + } + // XORQ r64, m64 + if isReg64(v0) && isM64(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.rexm(1, hcode(v[0]), addr(v[1])) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORQ") + } + return p +} + +// XORW performs "Logical Exclusive OR". +// +// Mnemonic : XOR +// Supported forms : (8 forms) +// +// * XORW imm16, ax +// * XORW imm8, r16 +// * XORW imm16, r16 +// * XORW r16, r16 +// * XORW m16, r16 +// * XORW imm8, m16 +// * XORW imm16, m16 +// * XORW r16, m16 +// +func (self *Program) XORW(v0 interface{}, v1 interface{}) *Instruction { + p := self.alloc("XORW", 2, Operands { v0, v1 }) + // XORW imm16, ax + if isImm16(v0) && v1 == AX { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.emit(0x35) + m.imm2(toImmAny(v[0])) + }) + } + // XORW imm8, r16 + if isImm8Ext(v0, 2) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x83) + m.emit(0xf0 | lcode(v[1])) + m.imm1(toImmAny(v[0])) + }) + } + // XORW imm16, r16 + if isImm16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, v[1], false) + m.emit(0x81) + m.emit(0xf0 | lcode(v[1])) + m.imm2(toImmAny(v[0])) + }) + } + // XORW r16, r16 + if isReg16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), v[1], false) + m.emit(0x31) + m.emit(0xc0 | lcode(v[0]) << 3 | lcode(v[1])) + }) + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), v[0], false) + m.emit(0x33) + m.emit(0xc0 | lcode(v[1]) << 3 | lcode(v[0])) + }) + } + // XORW m16, r16 + if isM16(v0) && isReg16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[1]), addr(v[0]), false) + m.emit(0x33) + m.mrsd(lcode(v[1]), addr(v[0]), 1) + }) + } + // XORW imm8, m16 + if isImm8Ext(v0, 2) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x83) + m.mrsd(6, addr(v[1]), 1) + m.imm1(toImmAny(v[0])) + }) + } + // XORW imm16, m16 + if isImm16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(0, addr(v[1]), false) + m.emit(0x81) + m.mrsd(6, addr(v[1]), 1) + m.imm2(toImmAny(v[0])) + }) + } + // XORW r16, m16 + if isReg16(v0) && isM16(v1) { + p.domain = DomainGeneric + p.add(0, func(m *_Encoding, v []interface{}) { + m.emit(0x66) + m.rexo(hcode(v[0]), addr(v[1]), false) + m.emit(0x31) + m.mrsd(lcode(v[0]), addr(v[1]), 1) + }) + } + if p.len == 0 { + panic("invalid operands for XORW") + } + return p +} |
